From 52338d6ef5354b1441e00536ebe0f24e7bf48911 Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Tue, 28 Oct 2025 15:27:43 +0100
Subject: [PATCH 01/10] Started to remove whitespace

---
 src/dialect/snowflake.rs     |   2 +-
 src/parser/mod.rs            |  63 +------
 src/tokenizer.rs             | 324 ++++-------------------------------
 tests/sqlparser_snowflake.rs |  11 --
 4 files changed, 40 insertions(+), 360 deletions(-)

diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs
index 825fd45f0..6cb344fac 100644
--- a/src/dialect/snowflake.rs
+++ b/src/dialect/snowflake.rs
@@ -1051,7 +1051,7 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserE
     let mut ident = String::new();
     while let Some(next_token) = parser.next_token_no_skip() {
         match &next_token.token {
-            Token::Whitespace(_) | Token::SemiColon => break,
+            Token::SemiColon => break,
             Token::Period => {
                 parser.prev_token();
                 break;
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index b44171c7d..ab981f9f5 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -4034,13 +4034,6 @@ impl<'a> Parser<'a> {
         core::array::from_fn(|_| loop {
             let token = self.tokens.get(index);
             index += 1;
-            if let Some(TokenWithSpan {
-                token: Token::Whitespace(_),
-                span: _,
-            }) = token
-            {
-                continue;
-            }
             break token.cloned().unwrap_or(TokenWithSpan {
                 token: Token::EOF,
                 span: Span::empty(),
@@ -4057,13 +4050,6 @@ impl<'a> Parser<'a> {
         core::array::from_fn(|_| loop {
             let token = self.tokens.get(index);
             index += 1;
-            if let Some(TokenWithSpan {
-                token: Token::Whitespace(_),
-                span: _,
-            }) = token
-            {
-                continue;
-            }
             break token.unwrap_or(&EOF_TOKEN);
         })
     }
@@ -4078,18 +4064,10 @@ impl<'a> Parser<'a> {
         let mut index = self.index;
         loop {
             index += 1;
-            match self.tokens.get(index - 1) {
-                Some(TokenWithSpan {
-                    token: Token::Whitespace(_),
-                    span: _,
-                }) => continue,
-                non_whitespace => {
-                    if n == 0 {
-                        return non_whitespace.unwrap_or(&EOF_TOKEN);
-                    }
-                    n -= 1;
-                }
+            if n == 0 {
+                return self.tokens.get(index - 1).unwrap_or(&EOF_TOKEN);
             }
+            n -= 1;
         }
     }
 
@@ -4147,16 +4125,7 @@ impl<'a> Parser<'a> {
     ///
     /// See [`Self::get_current_token`] to get the current token after advancing
     pub fn advance_token(&mut self) {
-        loop {
-            self.index += 1;
-            match self.tokens.get(self.index - 1) {
-                Some(TokenWithSpan {
-                    token: Token::Whitespace(_),
-                    span: _,
-                }) => continue,
-                _ => break,
-            }
-        }
+        self.index += 1;
     }
 
     /// Returns a reference to the current token
@@ -4187,18 +4156,8 @@ impl<'a> Parser<'a> {
     ///
     // TODO rename to backup_token and deprecate prev_token?
     pub fn prev_token(&mut self) {
-        loop {
-            assert!(self.index > 0);
-            self.index -= 1;
-            if let Some(TokenWithSpan {
-                token: Token::Whitespace(_),
-                span: _,
-            }) = self.tokens.get(self.index)
-            {
-                continue;
-            }
-            return;
-        }
+        assert!(self.index > 0);
+        self.index -= 1;
     }
 
     /// Report `found` was encountered instead of `expected`
@@ -9999,14 +9958,6 @@ impl<'a> Parser<'a> {
         let mut content = String::from("");
         while let Some(t) = self.next_token_no_skip().map(|t| &t.token) {
             match t {
-                Token::Whitespace(Whitespace::Tab) => {
-                    values.push(Some(content.to_string()));
-                    content.clear();
-                }
-                Token::Whitespace(Whitespace::Newline) => {
-                    values.push(Some(content.to_string()));
-                    content.clear();
-                }
                 Token::Backslash => {
                     if self.consume_token(&Token::Period) {
                         return values;
@@ -11396,7 +11347,7 @@ impl<'a> Parser<'a> {
                     // otherwise foo-123a will be parsed as `foo-123` with the alias `a`.
                     if requires_whitespace {
                         let token = self.next_token();
-                        if !matches!(token.token, Token::EOF | Token::Whitespace(_)) {
+                        if !matches!(token.token, Token::EOF) {
                             return self
                                 .expected("whitespace following hyphenated identifier", token);
                         }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 54a158c1f..451545157 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -106,8 +106,6 @@ pub enum Token {
     HexStringLiteral(String),
     /// Comma
     Comma,
-    /// Whitespace (space, tab, etc)
-    Whitespace(Whitespace),
     /// Double equals sign `==`
     DoubleEq,
     /// Equality operator `=`
@@ -304,7 +302,6 @@ impl fmt::Display for Token {
             Token::TripleSingleQuotedRawStringLiteral(ref s) => write!(f, "R'''{s}'''"),
             Token::TripleDoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"\"\"{s}\"\"\""),
             Token::Comma => f.write_str(","),
-            Token::Whitespace(ws) => write!(f, "{ws}"),
             Token::DoubleEq => f.write_str("=="),
             Token::Spaceship => f.write_str("<=>"),
             Token::Eq => f.write_str("="),
@@ -449,29 +446,6 @@ impl Word {
     }
 }
 
-#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
-pub enum Whitespace {
-    Space,
-    Newline,
-    Tab,
-    SingleLineComment { comment: String, prefix: String },
-    MultiLineComment(String),
-}
-
-impl fmt::Display for Whitespace {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Whitespace::Space => f.write_str(" "),
-            Whitespace::Newline => f.write_str("\n"),
-            Whitespace::Tab => f.write_str("\t"),
-            Whitespace::SingleLineComment { prefix, comment } => write!(f, "{prefix}{comment}"),
-            Whitespace::MultiLineComment(s) => write!(f, "/*{s}*/"),
-        }
-    }
-}
-
 /// Location in input string
 ///
 /// # Create an "empty" (unknown) `Location`
@@ -898,7 +872,7 @@ impl<'a> Tokenizer<'a> {
         };
 
         let mut location = state.location();
-        while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? {
+        while let Some(token) = self.next_token(&mut location, &mut state, buf.last().map(|t| &t.token), false)? {
             let span = location.span_to(state.location());
 
             buf.push(TokenWithSpan { token, span });
@@ -937,22 +911,18 @@ impl<'a> Tokenizer<'a> {
     /// Get the next token or return None
     fn next_token(
         &self,
+        location: &mut Location,
         chars: &mut State,
         prev_token: Option<&Token>,
+        preceded_by_whitespace: bool,
     ) -> Result<Option<Token>, TokenizerError> {
         match chars.peek() {
             Some(&ch) => match ch {
-                ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
-                '\t' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Tab)),
-                '\n' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Newline)),
-                '\r' => {
-                    // Emit a single Whitespace::Newline token for \r and \r\n
-                    chars.next();
-                    if let Some('\n') = chars.peek() {
-                        chars.next();
-                    }
-                    Ok(Some(Token::Whitespace(Whitespace::Newline)))
-                }
+                ' ' | '\t' | '\n' | '\r' => {
+                    chars.next(); // consume
+                    *location = chars.location();
+                    self.next_token(location, chars, prev_token, true)
+                },
                 // BigQuery and MySQL use b or B for byte string literal, Postgres for bit strings
                 b @ 'B' | b @ 'b' if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | MySqlDialect | GenericDialect) =>
                 {
@@ -1331,13 +1301,10 @@ impl<'a> Tokenizer<'a> {
 
                             if is_comment {
                                 chars.next(); // consume second '-'
-                                let comment = self.tokenize_single_line_comment(chars);
-                                return Ok(Some(Token::Whitespace(
-                                    Whitespace::SingleLineComment {
-                                        prefix: "--".to_owned(),
-                                        comment,
-                                    },
-                                )));
+                                // Consume the rest of the line as comment
+                                let _comment = self.tokenize_single_line_comment(chars);
+                                *location = chars.location();
+                                return self.next_token(location, chars, prev_token, true);
                             }
 
                             self.start_binop(chars, "-", Token::Minus)
@@ -1358,15 +1325,16 @@ impl<'a> Tokenizer<'a> {
                     match chars.peek() {
                         Some('*') => {
                             chars.next(); // consume the '*', starting a multi-line comment
-                            self.tokenize_multiline_comment(chars)
+                            let _comment = self.consume_multiline_comment(chars)?;
+                            *location = chars.location();
+                            self.next_token(location, chars, prev_token, true)
                         }
                         Some('/') if dialect_of!(self is SnowflakeDialect) => {
                             chars.next(); // consume the second '/', starting a snowflake single-line comment
-                            let comment = self.tokenize_single_line_comment(chars);
-                            Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
-                                prefix: "//".to_owned(),
-                                comment,
-                            })))
+                            // Consume the rest of the line as comment
+                            let _comment = self.tokenize_single_line_comment(chars);
+                            *location = chars.location();
+                            self.next_token(location, chars, prev_token, true)
                         }
                         Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
                             self.consume_and_return(chars, Token::DuckIntDiv)
@@ -1567,11 +1535,10 @@ impl<'a> Tokenizer<'a> {
                 '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
                 {
                     chars.next(); // consume the '#', starting a snowflake single-line comment
-                    let comment = self.tokenize_single_line_comment(chars);
-                    Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "#".to_owned(),
-                        comment,
-                    })))
+                    // Consume the rest of the line as comment
+                    let _comment = self.tokenize_single_line_comment(chars);
+                    *location = chars.location();
+                    self.next_token(location, chars, prev_token, true)
                 }
                 '~' => {
                     chars.next(); // consume
@@ -1701,7 +1668,9 @@ impl<'a> Tokenizer<'a> {
 
                 // whitespace check (including unicode chars) should be last as it covers some of the chars above
                 ch if ch.is_whitespace() => {
-                    self.consume_and_return(chars, Token::Whitespace(Whitespace::Space))
+                    chars.next(); // consume
+                    *location = chars.location();
+                    self.next_token(location, chars, prev_token, true)
                 }
                 other => self.consume_and_return(chars, Token::Char(other)),
             },
@@ -2101,10 +2070,10 @@ impl<'a> Tokenizer<'a> {
         self.tokenizer_error(error_loc, "Unterminated string literal")
     }
 
-    fn tokenize_multiline_comment(
+    fn consume_multiline_comment(
         &self,
         chars: &mut State,
-    ) -> Result<Option<Token>, TokenizerError> {
+    ) -> Result<Option<String>, TokenizerError> {
         let mut s = String::new();
         let mut nested = 1;
         let supports_nested_comments = self.dialect.supports_nested_comments();
@@ -2121,7 +2090,7 @@ impl<'a> Tokenizer<'a> {
                     chars.next(); // consume the '/'
                     nested -= 1;
                     if nested == 0 {
-                        break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
+                        break Ok(Some(s));
                     }
                     s.push('*');
                     s.push('/');
@@ -2444,7 +2413,6 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];
 
@@ -2459,7 +2427,6 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from(".1"), false),
         ];
 
@@ -2475,7 +2442,6 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Word(Word {
                 value: "foo".to_string(),
                 quote_style: None,
@@ -2496,7 +2462,6 @@ mod tests {
         let tokens = tokenizer.tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Number("10".to_string(), false),
             Token::make_word("_000", None),
         ];
@@ -2506,17 +2471,13 @@ mod tests {
             "SELECT 10_000, _10_000, 10_00_, 10___0",
             vec![
                 Token::make_keyword("SELECT"),
-                Token::Whitespace(Whitespace::Space),
                 Token::Number("10_000".to_string(), false),
                 Token::Comma,
-                Token::Whitespace(Whitespace::Space),
                 Token::make_word("_10_000", None), // leading underscore tokenizes as a word (parsed as column identifier)
                 Token::Comma,
-                Token::Whitespace(Whitespace::Space),
                 Token::Number("10_00".to_string(), false),
                 Token::make_word("_", None), // trailing underscores tokenizes as a word (syntax error in some dialects)
                 Token::Comma,
-                Token::Whitespace(Whitespace::Space),
                 Token::Number("10".to_string(), false),
                 Token::make_word("___0", None), // multiple underscores tokenizes as a word (syntax error in some dialects)
             ],
@@ -2531,24 +2492,18 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e10"), false),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e-10"), false),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e+10"), false),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
             Token::make_word("ea", None),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e-10"), false),
             Token::make_word("a", None),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1e-10"), false),
             Token::Minus,
             Token::Number(String::from("10"), false),
@@ -2565,7 +2520,6 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("sqrt", None),
             Token::LParen,
             Token::Number(String::from("1"), false),
@@ -2583,11 +2537,8 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString(String::from("a")),
-            Token::Whitespace(Whitespace::Space),
             Token::StringConcat,
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString(String::from("b")),
         ];
 
@@ -2601,15 +2552,10 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("one", None),
-            Token::Whitespace(Whitespace::Space),
             Token::Pipe,
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("two", None),
-            Token::Whitespace(Whitespace::Space),
             Token::Caret,
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("three", None),
         ];
         compare(expected, tokens);
@@ -2624,32 +2570,20 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("false"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("XOR"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("true"),
         ];
         compare(expected, tokens);
@@ -2663,23 +2597,14 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Mul,
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("id", None),
-            Token::Whitespace(Whitespace::Space),
             Token::Eq,
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("LIMIT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("5"), false),
         ];
 
@@ -2694,21 +2619,13 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("EXPLAIN"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Mul,
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("id", None),
-            Token::Whitespace(Whitespace::Space),
             Token::Eq,
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];
 
@@ -2723,23 +2640,14 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("EXPLAIN"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("ANALYZE"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Mul,
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("id", None),
-            Token::Whitespace(Whitespace::Space),
             Token::Eq,
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];
 
@@ -2754,19 +2662,12 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Mul,
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("customer", None),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("WHERE"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("salary", None),
-            Token::Whitespace(Whitespace::Space),
             Token::Neq,
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString(String::from("Not Provided")),
         ];
 
@@ -2781,7 +2682,6 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         // println!("tokens: {:#?}", tokens);
         let expected = vec![
-            Token::Whitespace(Whitespace::Newline),
             Token::Char('💝'),
             Token::make_word("مصطفىh", None),
         ];
@@ -2839,16 +2739,10 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         // println!("tokens: {:#?}", tokens);
         let expected = vec![
-            Token::Whitespace(Whitespace::Newline),
-            Token::Whitespace(Whitespace::Newline),
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Mul,
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("table"),
-            Token::Whitespace(Whitespace::Tab),
             Token::Char('💝'),
             Token::make_word("مصطفىh", None),
         ];
@@ -2862,7 +2756,6 @@ mod tests {
                 String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$"),
                 vec![
                     Token::make_keyword("SELECT"),
-                    Token::Whitespace(Whitespace::Space),
                     Token::DollarQuotedString(DollarQuotedString {
                         value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
                         tag: Some("tag".into()),
@@ -2873,7 +2766,6 @@ mod tests {
                 String::from("SELECT $abc$x$ab$abc$"),
                 vec![
                     Token::make_keyword("SELECT"),
-                    Token::Whitespace(Whitespace::Space),
                     Token::DollarQuotedString(DollarQuotedString {
                         value: "x$ab".into(),
                         tag: Some("abc".into()),
@@ -2884,7 +2776,6 @@ mod tests {
                 String::from("SELECT $abc$$abc$"),
                 vec![
                     Token::make_keyword("SELECT"),
-                    Token::Whitespace(Whitespace::Space),
                     Token::DollarQuotedString(DollarQuotedString {
                         value: "".into(),
                         tag: Some("abc".into()),
@@ -2961,16 +2852,12 @@ mod tests {
             tokens,
             vec![
                 Token::make_keyword("SELECT"),
-                Token::Whitespace(Whitespace::Space),
                 Token::Placeholder("$$".into()),
                 Token::Comma,
-                Token::Whitespace(Whitespace::Space),
                 Token::Placeholder("$$ABC$$".into()),
                 Token::Comma,
-                Token::Whitespace(Whitespace::Space),
                 Token::Placeholder("$ABC$".into()),
                 Token::Comma,
-                Token::Whitespace(Whitespace::Space),
                 Token::Placeholder("$ABC".into()),
             ]
         );
@@ -2983,7 +2870,6 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::DollarQuotedString(DollarQuotedString {
                 value: "dollar $nested$ string".into(),
                 tag: Some("tag".into()),
@@ -2999,7 +2885,6 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::DollarQuotedString(DollarQuotedString {
                 value: "".into(),
                 tag: None,
@@ -3016,7 +2901,6 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::DollarQuotedString(DollarQuotedString {
                 value: "within dollar '$' quoted strings have $tags like this$ ".into(),
                 tag: None,
@@ -3067,9 +2951,7 @@ mod tests {
 
         let expected = vec![
             Token::make_word("a", None),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("IS"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("NULL"),
         ];
 
@@ -3083,10 +2965,6 @@ mod tests {
                 String::from("0--this is a comment\n1"),
                 vec![
                     Token::Number("0".to_string(), false),
-                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: "this is a comment\n".to_string(),
-                    }),
                     Token::Number("1".to_string(), false),
                 ],
             ),
@@ -3094,20 +2972,12 @@ mod tests {
                 String::from("0--this is a comment\r1"),
                 vec![
                     Token::Number("0".to_string(), false),
-                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: "this is a comment\r1".to_string(),
-                    }),
                 ],
             ),
             (
                 String::from("0--this is a comment\r\n1"),
                 vec![
                     Token::Number("0".to_string(), false),
-                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: "this is a comment\r\n".to_string(),
-                    }),
                     Token::Number("1".to_string(), false),
                 ],
             ),
@@ -3129,10 +2999,6 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Number("1".to_string(), false),
-            Token::Whitespace(Whitespace::SingleLineComment {
-                prefix: "--".to_string(),
-                comment: "\r".to_string(),
-            }),
             Token::Number("0".to_string(), false),
         ];
         compare(expected, tokens);
@@ -3144,11 +3010,7 @@ mod tests {
 
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
-        let expected = vec![Token::Whitespace(Whitespace::SingleLineComment {
-            prefix: "--".to_string(),
-            comment: "this is a comment".to_string(),
-        })];
-        compare(expected, tokens);
+        assert!(tokens.is_empty());
     }
 
     #[test]
@@ -3159,9 +3021,6 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::Number("0".to_string(), false),
-            Token::Whitespace(Whitespace::MultiLineComment(
-                "multi-line\n* /comment".to_string(),
-            )),
             Token::Number("1".to_string(), false),
         ];
         compare(expected, tokens);
@@ -3173,10 +3032,6 @@ mod tests {
             "0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1",
             vec![
                 Token::Number("0".to_string(), false),
-                Token::Whitespace(Whitespace::MultiLineComment(
-                    "multi-line\n* \n/* comment \n /*comment*/*/ ".into(),
-                )),
-                Token::Whitespace(Whitespace::Space),
                 Token::Div,
                 Token::Word(Word {
                     value: "comment".to_string(),
@@ -3193,9 +3048,6 @@ mod tests {
             "0/*multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/*/1",
             vec![
                 Token::Number("0".to_string(), false),
-                Token::Whitespace(Whitespace::MultiLineComment(
-                    "multi-line\n* \n/* comment \n /*comment/**/ */ /comment*/".into(),
-                )),
                 Token::Number("1".to_string(), false),
             ],
         );
@@ -3204,9 +3056,7 @@ mod tests {
             "SELECT 1/* a /* b */ c */0",
             vec![
                 Token::make_keyword("SELECT"),
-                Token::Whitespace(Whitespace::Space),
                 Token::Number("1".to_string(), false),
-                Token::Whitespace(Whitespace::MultiLineComment(" a /* b */ c ".to_string())),
                 Token::Number("0".to_string(), false),
             ],
         );
@@ -3218,9 +3068,7 @@ mod tests {
             "select 1/*/**/*/0",
             vec![
                 Token::make_keyword("select"),
-                Token::Whitespace(Whitespace::Space),
                 Token::Number("1".to_string(), false),
-                Token::Whitespace(Whitespace::MultiLineComment("/**/".to_string())),
                 Token::Number("0".to_string(), false),
             ],
         );
@@ -3232,11 +3080,7 @@ mod tests {
             "SELECT 1/*/* nested comment */*/0",
             vec![
                 Token::make_keyword("SELECT"),
-                Token::Whitespace(Whitespace::Space),
                 Token::Number("1".to_string(), false),
-                Token::Whitespace(Whitespace::MultiLineComment(
-                    "/* nested comment ".to_string(),
-                )),
                 Token::Mul,
                 Token::Div,
                 Token::Number("0".to_string(), false),
@@ -3250,12 +3094,7 @@ mod tests {
 
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
-        let expected = vec![
-            Token::Whitespace(Whitespace::Newline),
-            Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())),
-            Token::Whitespace(Whitespace::Newline),
-        ];
-        compare(expected, tokens);
+        assert!(tokens.is_empty());
     }
 
     #[test]
@@ -3264,12 +3103,7 @@ mod tests {
 
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
-        let expected = vec![
-            Token::Whitespace(Whitespace::Space),
-            Token::Whitespace(Whitespace::Space),
-            Token::Whitespace(Whitespace::Newline),
-        ];
-        compare(expected, tokens);
+        assert!(tokens.is_empty());
     }
 
     #[test]
@@ -3295,13 +3129,9 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         let expected = vec![
             Token::make_word("line1", None),
-            Token::Whitespace(Whitespace::Newline),
             Token::make_word("line2", None),
-            Token::Whitespace(Whitespace::Newline),
             Token::make_word("line3", None),
-            Token::Whitespace(Whitespace::Newline),
             Token::make_word("line4", None),
-            Token::Whitespace(Whitespace::Newline),
         ];
         compare(expected, tokens);
     }
@@ -3313,15 +3143,10 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("TOP"),
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("5"), false),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("bar", Some('[')),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("foo", None),
         ];
         compare(expected, tokens);
@@ -3334,32 +3159,20 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
-            Token::Whitespace(Whitespace::Space),
             Token::Tilde,
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
-            Token::Whitespace(Whitespace::Space),
             Token::TildeAsterisk,
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
-            Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkTilde,
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
-            Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkTildeAsterisk,
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("^a".into()),
         ];
         compare(expected, tokens);
@@ -3372,32 +3185,20 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
-            Token::Whitespace(Whitespace::Space),
             Token::DoubleTilde,
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
-            Token::Whitespace(Whitespace::Space),
             Token::DoubleTildeAsterisk,
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
-            Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkDoubleTilde,
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
             Token::Comma,
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("col", None),
-            Token::Whitespace(Whitespace::Space),
             Token::ExclamationMarkDoubleTildeAsterisk,
-            Token::Whitespace(Whitespace::Space),
             Token::SingleQuotedString("_a%".into()),
         ];
         compare(expected, tokens);
@@ -3409,13 +3210,9 @@ mod tests {
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
-            Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a " b"#, Some('"')),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a ""#, Some('"')),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"c """#, Some('"')),
-            Token::Whitespace(Whitespace::Space),
         ];
         compare(expected, tokens);
     }
@@ -3442,13 +3239,9 @@ mod tests {
             .tokenize()
             .unwrap();
         let expected = vec![
-            Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a "" b"#, Some('"')),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"a """#, Some('"')),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word(r#"c """""#, Some('"')),
-            Token::Whitespace(Whitespace::Space),
         ];
         compare(expected, tokens);
     }
@@ -3462,23 +3255,8 @@ mod tests {
             .unwrap();
         let expected = vec![
             TokenWithSpan::at(Token::make_keyword("SELECT"), (1, 1).into(), (1, 7).into()),
-            TokenWithSpan::at(
-                Token::Whitespace(Whitespace::Space),
-                (1, 7).into(),
-                (1, 8).into(),
-            ),
             TokenWithSpan::at(Token::make_word("a", None), (1, 8).into(), (1, 9).into()),
             TokenWithSpan::at(Token::Comma, (1, 9).into(), (1, 10).into()),
-            TokenWithSpan::at(
-                Token::Whitespace(Whitespace::Newline),
-                (1, 10).into(),
-                (2, 1).into(),
-            ),
-            TokenWithSpan::at(
-                Token::Whitespace(Whitespace::Space),
-                (2, 1).into(),
-                (2, 2).into(),
-            ),
             TokenWithSpan::at(Token::make_word("b", None), (2, 2).into(), (2, 3).into()),
         ];
         compare(expected, tokens);
@@ -3600,11 +3378,8 @@ mod tests {
         let tokens = Tokenizer::new(dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Mul,
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
-            Token::Whitespace(Whitespace::Space),
             Token::Number(String::from("1"), false),
         ];
         compare(expected, tokens);
@@ -3802,9 +3577,7 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("CREATE"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("USER"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("root", Some('`')),
             Token::AtSign,
             Token::make_word("%", Some('`')),
@@ -3820,7 +3593,6 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::AtSign,
             Token::SingleQuotedString("1".to_string()),
         ];
@@ -3835,12 +3607,9 @@ mod tests {
         let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::AtSign,
             Token::DoubleQuotedString("bar".to_string()),
-            Token::Whitespace(Whitespace::Space),
             Token::make_keyword("FROM"),
-            Token::Whitespace(Whitespace::Space),
             Token::make_word("foo", None),
         ];
         compare(expected, tokens);
@@ -3853,7 +3622,6 @@ mod tests {
                 "select n'''''\\'",
                 vec![
                     Token::make_keyword("select"),
-                    Token::Whitespace(Whitespace::Space),
                     Token::NationalStringLiteral("''\\".to_string()),
                 ],
             );
@@ -3866,7 +3634,6 @@ mod tests {
                 "select n'''''\\''",
                 vec![
                     Token::make_keyword("select"),
-                    Token::Whitespace(Whitespace::Space),
                     Token::NationalStringLiteral("'''".to_string()),
                 ],
             );
@@ -3878,7 +3645,6 @@ mod tests {
             "select e'...'",
             vec![
                 Token::make_keyword("select"),
-                Token::Whitespace(Whitespace::Space),
                 Token::make_word("e", None),
                 Token::SingleQuotedString("...".to_string()),
             ],
@@ -3888,7 +3654,6 @@ mod tests {
             "select E'...'",
             vec![
                 Token::make_keyword("select"),
-                Token::Whitespace(Whitespace::Space),
                 Token::make_word("E", None),
                 Token::SingleQuotedString("...".to_string()),
             ],
@@ -3901,7 +3666,6 @@ mod tests {
             "select e'\\''",
             vec![
                 Token::make_keyword("select"),
-                Token::Whitespace(Whitespace::Space),
                 Token::EscapedStringLiteral("'".to_string()),
             ],
         );
@@ -3910,7 +3674,6 @@ mod tests {
             "select E'\\''",
             vec![
                 Token::make_keyword("select"),
-                Token::Whitespace(Whitespace::Space),
                 Token::EscapedStringLiteral("'".to_string()),
             ],
         );
@@ -3923,7 +3686,6 @@ mod tests {
                 "SELECT --'abc'",
                 vec![
                     Token::make_keyword("SELECT"),
-                    Token::Whitespace(Whitespace::Space),
                     Token::Minus,
                     Token::Minus,
                     Token::SingleQuotedString("abc".to_string()),
@@ -3935,11 +3697,6 @@ mod tests {
                 "SELECT -- 'abc'",
                 vec![
                     Token::make_keyword("SELECT"),
-                    Token::Whitespace(Whitespace::Space),
-                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: " 'abc'".to_string(),
-                    }),
                 ],
             );
 
@@ -3948,7 +3705,6 @@ mod tests {
                 "SELECT --",
                 vec![
                     Token::make_keyword("SELECT"),
-                    Token::Whitespace(Whitespace::Space),
                     Token::Minus,
                     Token::Minus,
                 ],
@@ -3962,11 +3718,6 @@ mod tests {
                 "SELECT --'abc'",
                 vec![
                     Token::make_keyword("SELECT"),
-                    Token::Whitespace(Whitespace::Space),
-                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: "'abc'".to_string(),
-                    }),
                 ],
             );
 
@@ -3975,11 +3726,6 @@ mod tests {
                 "SELECT -- 'abc'",
                 vec![
                     Token::make_keyword("SELECT"),
-                    Token::Whitespace(Whitespace::Space),
-                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: " 'abc'".to_string(),
-                    }),
                 ],
             );
 
@@ -3988,11 +3734,6 @@ mod tests {
                 "SELECT --",
                 vec![
                     Token::make_keyword("SELECT"),
-                    Token::Whitespace(Whitespace::Space),
-                    Token::Whitespace(Whitespace::SingleLineComment {
-                        prefix: "--".to_string(),
-                        comment: "".to_string(),
-                    }),
                 ],
             );
     }
@@ -4033,7 +3774,6 @@ mod tests {
 
         let expected = vec![
             Token::make_keyword("SELECT"),
-            Token::Whitespace(Whitespace::Space),
             Token::Word(Word {
                 value: "table".to_string(),
                 quote_style: None,
diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs
index e7a128343..638b4aca3 100644
--- a/tests/sqlparser_snowflake.rs
+++ b/tests/sqlparser_snowflake.rs
@@ -563,12 +563,7 @@ fn test_snowflake_single_line_tokenize() {
 
     let expected = vec![
         Token::make_keyword("CREATE"),
-        Token::Whitespace(Whitespace::Space),
         Token::make_keyword("TABLE"),
-        Token::Whitespace(Whitespace::SingleLineComment {
-            prefix: "#".to_string(),
-            comment: " this is a comment \n".to_string(),
-        }),
         Token::make_word("table_1", None),
     ];
 
@@ -579,13 +574,7 @@ fn test_snowflake_single_line_tokenize() {
 
     let expected = vec![
         Token::make_keyword("CREATE"),
-        Token::Whitespace(Whitespace::Space),
         Token::make_keyword("TABLE"),
-        Token::Whitespace(Whitespace::Space),
-        Token::Whitespace(Whitespace::SingleLineComment {
-            prefix: "//".to_string(),
-            comment: " this is a comment \n".to_string(),
-        }),
         Token::make_word("table_1", None),
     ];
 

From c75f11bf478a2911fb1ad0ffe70b5aeda8e72bef Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Tue, 28 Oct 2025 16:26:35 +0100
Subject: [PATCH 02/10] Extended placeholder syntax test and moved check in
 tokenizer

---
 src/parser/mod.rs           |  2 +-
 src/tokenizer.rs            | 26 ++++++++++++++++++++++++--
 tests/sqlparser_bigquery.rs |  5 ++++-
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index ab981f9f5..a51781fbc 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -18475,7 +18475,7 @@ mod tests {
 
     #[test]
     fn test_placeholder_invalid_whitespace() {
-        for w in ["  ", "/*invalid*/"] {
+        for w in [" ", "  ", "/*invalid*/", "\n", "\t", "\r\n", "--comment\n"] {
             let sql = format!("\nSELECT\n  :{w}fooBar");
             assert!(Parser::parse_sql(&GenericDialect, &sql).is_err());
         }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 451545157..1dffb8c58 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -908,6 +908,22 @@ impl<'a> Tokenizer<'a> {
         Ok(Some(Token::make_word(&word, None)))
     }
 
+    /// Returns a standardized error if the previous token is a `:` and
+    /// the method is expected to be called when a space is found after it.
+    fn handle_colon_space_error(
+        &self,
+        chars: &State,
+        prev_token: Option<&Token>,
+    ) -> Result<Option<Token>, TokenizerError> {
+        if let Some(Token::Colon) = prev_token {
+            return Err(TokenizerError {
+                message: "Unexpected whitespace after ':'; did you mean ':placeholder' or '::'?".to_string(),
+                location: chars.location(),
+            });
+        }
+        Ok(None)
+    }
+
     /// Get the next token or return None
     fn next_token(
         &self,
@@ -919,6 +935,7 @@ impl<'a> Tokenizer<'a> {
         match chars.peek() {
             Some(&ch) => match ch {
                 ' ' | '\t' | '\n' | '\r' => {
+                    self.handle_colon_space_error(chars, prev_token)?;
                     chars.next(); // consume
                     *location = chars.location();
                     self.next_token(location, chars, prev_token, true)
@@ -1166,7 +1183,7 @@ impl<'a> Tokenizer<'a> {
                     // if the prev token is not a word, then this is not a valid sql
                     // word or number.
                     if ch == '.' && chars.peekable.clone().nth(1) == Some('_') {
-                        if let Some(Token::Word(_)) = prev_token {
+                        if !preceded_by_whitespace {
                             chars.next();
                             return Ok(Some(Token::Period));
                         }
@@ -1210,7 +1227,7 @@ impl<'a> Tokenizer<'a> {
                     // we should yield the dot as a dedicated token so compound identifiers
                     // starting with digits can be parsed correctly.
                     if s == "." && self.dialect.supports_numeric_prefix() {
-                        if let Some(Token::Word(_)) = prev_token {
+                        if !preceded_by_whitespace {
                             return Ok(Some(Token::Period));
                         }
                     }
@@ -1300,6 +1317,7 @@ impl<'a> Tokenizer<'a> {
                             }
 
                             if is_comment {
+                                self.handle_colon_space_error(chars, prev_token)?;
                                 chars.next(); // consume second '-'
                                 // Consume the rest of the line as comment
                                 let _comment = self.tokenize_single_line_comment(chars);
@@ -1324,12 +1342,14 @@ impl<'a> Tokenizer<'a> {
                     chars.next(); // consume the '/'
                     match chars.peek() {
                         Some('*') => {
+                            self.handle_colon_space_error(chars, prev_token)?;
                             chars.next(); // consume the '*', starting a multi-line comment
                             let _comment = self.consume_multiline_comment(chars)?;
                             *location = chars.location();
                             self.next_token(location, chars, prev_token, true)
                         }
                         Some('/') if dialect_of!(self is SnowflakeDialect) => {
+                            self.handle_colon_space_error(chars, prev_token)?;
                             chars.next(); // consume the second '/', starting a snowflake single-line comment
                             // Consume the rest of the line as comment
                             let _comment = self.tokenize_single_line_comment(chars);
@@ -1534,6 +1554,7 @@ impl<'a> Tokenizer<'a> {
                 '}' => self.consume_and_return(chars, Token::RBrace),
                 '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
                 {
+                    self.handle_colon_space_error(chars, prev_token)?;
                     chars.next(); // consume the '#', starting a snowflake single-line comment
                     // Consume the rest of the line as comment
                     let _comment = self.tokenize_single_line_comment(chars);
@@ -1668,6 +1689,7 @@ impl<'a> Tokenizer<'a> {
 
                 // whitespace check (including unicode chars) should be last as it covers some of the chars above
                 ch if ch.is_whitespace() => {
+                    self.handle_colon_space_error(chars, prev_token)?;
                     chars.next(); // consume
                     *location = chars.location();
                     self.next_token(location, chars, prev_token, true)
diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs
index 03a0ac813..9f1e72aae 100644
--- a/tests/sqlparser_bigquery.rs
+++ b/tests/sqlparser_bigquery.rs
@@ -1567,7 +1567,10 @@ fn parse_table_identifiers() {
 
     fn test_table_ident_err(ident: &str) {
         let sql = format!("SELECT 1 FROM {ident}");
-        assert!(bigquery().parse_sql_statements(&sql).is_err());
+        assert!(
+            bigquery().parse_sql_statements(&sql).is_err(),
+            "Expected error parsing identifier: `{ident}`, within SQL: `{sql}`"
+        );
     }
 
     test_table_ident("`spa ce`", None, vec![Ident::with_quote('`', "spa ce")]);

From 1b8d716182009fbdc5891c1c44bc6c2e22598018 Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Tue, 28 Oct 2025 16:36:51 +0100
Subject: [PATCH 03/10] Made `test_table_ident_err` more verbose

---
 tests/sqlparser_bigquery.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs
index 9f1e72aae..c29b98da6 100644
--- a/tests/sqlparser_bigquery.rs
+++ b/tests/sqlparser_bigquery.rs
@@ -1567,9 +1567,10 @@ fn parse_table_identifiers() {
 
     fn test_table_ident_err(ident: &str) {
         let sql = format!("SELECT 1 FROM {ident}");
+        let parsed = bigquery().parse_sql_statements(&sql);
         assert!(
-            bigquery().parse_sql_statements(&sql).is_err(),
-            "Expected error parsing identifier: `{ident}`, within SQL: `{sql}`"
+            parsed.is_err(),
+            "Expected error parsing identifier: `{ident}`, within SQL: `{sql}` - but got success: {parsed:#?}"
         );
     }
 

From b862dc7eab00b45913cf1af742a7d3b53ab95998 Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Tue, 28 Oct 2025 22:56:31 +0100
Subject: [PATCH 04/10] Added handling of CSVs in COPY STDIN

---
 Cargo.toml               |   1 +
 src/ast/mod.rs           |  78 ++++++++++--
 src/dialect/bigquery.rs  |   2 +-
 src/dialect/snowflake.rs |   6 +-
 src/parser/mod.rs        | 249 ++++++++++++++++++++++++---------------
 src/tokenizer.rs         | 174 +++++++++++++++++----------
 6 files changed, 339 insertions(+), 171 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index ed94bbbdd..005cb4567 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -47,6 +47,7 @@ visitor = ["sqlparser_derive"]
 [dependencies]
 bigdecimal = { version = "0.4.1", features = ["serde"], optional = true }
 log = "0.4"
+csv = "1.4.0"
 recursive = { version = "0.1.1", optional = true}
 
 serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 176d36545..184560a96 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -3227,7 +3227,7 @@ pub enum Statement {
         /// WITH options (before PostgreSQL version 9.0)
         legacy_options: Vec<CopyLegacyOption>,
         /// VALUES a vector of values to be copied
-        values: Vec<Option<String>>,
+        values: Vec<Vec<Option<String>>>,
     },
     /// ```sql
     /// COPY INTO <table> | <location>
@@ -4579,18 +4579,76 @@ impl fmt::Display for Statement {
                 if !legacy_options.is_empty() {
                     write!(f, " {}", display_separated(legacy_options, " "))?;
                 }
+
+                let mut null_symbol = "\\N";
+                let mut writer_builder = csv::WriterBuilder::new();
+
+                // Apply options
+                for option in options {
+                    match option {
+                        CopyOption::Delimiter(c) => {
+                            writer_builder.delimiter(*c as u8);
+                        }
+                        CopyOption::Quote(c) => {
+                            writer_builder.quote(*c as u8);
+                        }
+                        CopyOption::Escape(c) => {
+                            writer_builder.escape(*c as u8);
+                        }
+                        CopyOption::Null(null) => {
+                            null_symbol = null;
+                        }
+                        _ => {}
+                    }
+                }
+
+                // Apply legacy options
+                for option in legacy_options {
+                    match option {
+                        CopyLegacyOption::Delimiter(c) => {
+                            writer_builder.delimiter(*c as u8);
+                        }
+                        CopyLegacyOption::Header => {
+                            writer_builder.has_headers(true);
+                        }
+                        CopyLegacyOption::Null(null) => {
+                            null_symbol = null;
+                        }
+                        CopyLegacyOption::Csv(csv_options) => {
+                            for csv_option in csv_options {
+                                match csv_option {
+                                    CopyLegacyCsvOption::Header => {
+                                        writer_builder.has_headers(true);
+                                    }
+                                    CopyLegacyCsvOption::Quote(c) => {
+                                        writer_builder.quote(*c as u8);
+                                    }
+                                    CopyLegacyCsvOption::Escape(c) => {
+                                        writer_builder.escape(*c as u8);
+                                    }
+                                    _ => {}
+                                }
+                            }
+                        }
+                        _ => {}
+                    }
+                }
+
                 if !values.is_empty() {
                     writeln!(f, ";")?;
-                    let mut delim = "";
-                    for v in values {
-                        write!(f, "{delim}")?;
-                        delim = "\t";
-                        if let Some(v) = v {
-                            write!(f, "{v}")?;
-                        } else {
-                            write!(f, "\\N")?;
-                        }
+                    let mut writer = writer_builder.from_writer(vec![]);
+                    for row in values {
+                        writer
+                            .write_record(
+                                row.iter()
+                                    .map(|column| column.as_deref().unwrap_or(null_symbol)),
+                            )
+                            .map_err(|_| fmt::Error)?
                     }
+                    writer.flush().map_err(|_| fmt::Error)?;
+                    let data = String::from_utf8(writer.into_inner().map_err(|_| fmt::Error)?)
+                        .map_err(|_| fmt::Error)?;
+                    write!(f, "{}", data)?;
                     write!(f, "\n\\.")?;
                 }
                 Ok(())
diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs
index 27fd3cca3..78b830fc9 100644
--- a/src/dialect/bigquery.rs
+++ b/src/dialect/bigquery.rs
@@ -83,7 +83,7 @@ impl Dialect for BigQueryDialect {
     }
 
     fn is_identifier_part(&self, ch: char) -> bool {
-        ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
+        ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_' || ch == '-'
     }
 
     /// See [doc](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs
index 6cb344fac..6b40125e3 100644
--- a/src/dialect/snowflake.rs
+++ b/src/dialect/snowflake.rs
@@ -1049,9 +1049,9 @@ pub fn parse_create_stage(
 
 pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserError> {
     let mut ident = String::new();
-    while let Some(next_token) = parser.next_token_no_skip() {
-        match &next_token.token {
-            Token::SemiColon => break,
+    loop {
+        match &parser.next_token().token {
+            Token::SemiColon | Token::EOF => break,
             Token::Period => {
                 parser.prev_token();
                 break;
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index a51781fbc..42dc758fb 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -4071,23 +4071,6 @@ impl<'a> Parser<'a> {
         }
     }
 
-    /// Return the first token, possibly whitespace, that has not yet been processed
-    /// (or None if reached end-of-file).
-    pub fn peek_token_no_skip(&self) -> TokenWithSpan {
-        self.peek_nth_token_no_skip(0)
-    }
-
-    /// Return nth token, possibly whitespace, that has not yet been processed.
-    pub fn peek_nth_token_no_skip(&self, n: usize) -> TokenWithSpan {
-        self.tokens
-            .get(self.index + n)
-            .cloned()
-            .unwrap_or(TokenWithSpan {
-                token: Token::EOF,
-                span: Span::empty(),
-            })
-    }
-
     /// Return true if the next tokens exactly `expected`
     ///
     /// Does not advance the current token.
@@ -4115,12 +4098,6 @@ impl<'a> Parser<'a> {
         self.index.saturating_sub(1)
     }
 
-    /// Return the next unprocessed token, possibly whitespace.
-    pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> {
-        self.index += 1;
-        self.tokens.get(self.index - 1)
-    }
-
     /// Advances the current token to the next non-whitespace token
     ///
     /// See [`Self::get_current_token`] to get the current token after advancing
@@ -9556,6 +9533,101 @@ impl<'a> Parser<'a> {
         }
     }
 
+    pub fn parse_csv_body(
+        &mut self,
+        options: &[CopyOption],
+        legacy_options: &[CopyLegacyOption],
+    ) -> Result<Vec<Vec<Option<String>>>, ParserError> {
+        let Token::CopyFromStdin(body) = self.next_token().token else {
+            return self.expected(
+                "COPY ... FROM STDIN with CSV body",
+                self.peek_token(),
+            );
+        };
+
+        let mut reader_builder = csv::ReaderBuilder::new();
+
+        let mut null_symbol = "\\N";
+
+        // Apply options
+        for option in options {
+            match option {
+                CopyOption::Delimiter(c) => {
+                    reader_builder.delimiter(*c as u8);
+                }
+                CopyOption::Header(has_header) => {
+                    reader_builder.has_headers(*has_header);
+                }
+                CopyOption::Quote(c) => {
+                    reader_builder.quote(*c as u8);
+                }
+                CopyOption::Escape(c) => {
+                    reader_builder.escape(Some(*c as u8));
+                }
+                CopyOption::Null(null) => {
+                    null_symbol = null;
+                }
+                _ => {}
+            }
+        }
+
+        // Apply legacy options
+        for option in legacy_options {
+            match option {
+                CopyLegacyOption::Delimiter(c) => {
+                    reader_builder.delimiter(*c as u8);
+                }
+                CopyLegacyOption::Header => {
+                    reader_builder.has_headers(true);
+                }
+                CopyLegacyOption::Null(null) => {
+                    null_symbol = null;
+                }
+                CopyLegacyOption::Csv(csv_options) => {
+                    for csv_option in csv_options {
+                        match csv_option {
+                            CopyLegacyCsvOption::Header => {
+                                reader_builder.has_headers(true);
+                            }
+                            CopyLegacyCsvOption::Quote(c) => {
+                                reader_builder.quote(*c as u8);
+                            }
+                            CopyLegacyCsvOption::Escape(c) => {
+                                reader_builder.escape(Some(*c as u8));
+                            }
+                            _ => {}
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        let mut result = vec![];
+        let mut reader = reader_builder.from_reader(body.as_bytes());
+        for record in reader.records() {
+            let record = match record {
+                Ok(rec) => rec,
+                Err(e) => {
+                    return Err(ParserError::ParserError(format!(
+                        "Error parsing CSV data: {}",
+                        e
+                    )))
+                }
+            };
+            let mut row = vec![];
+            for field in record.iter() {
+                if field == null_symbol {
+                    row.push(None);
+                } else {
+                    row.push(Some(field.to_string()));
+                }
+            }
+            result.push(row);
+        }
+        Ok(result)
+    }
+
     /// Parse a copy statement
     pub fn parse_copy(&mut self) -> Result<Statement, ParserError> {
         let source;
@@ -9609,7 +9681,7 @@ impl<'a> Parser<'a> {
         }
         let values = if let CopyTarget::Stdin = target {
             self.expect_token(&Token::SemiColon)?;
-            self.parse_tsv()
+            self.parse_csv_body(&options, &legacy_options)?
         } else {
             vec![]
         };
@@ -9947,35 +10019,6 @@ impl<'a> Parser<'a> {
         Ok(s.chars().next().unwrap())
     }
 
-    /// Parse a tab separated values in
-    /// COPY payload
-    pub fn parse_tsv(&mut self) -> Vec<Option<String>> {
-        self.parse_tab_value()
-    }
-
-    pub fn parse_tab_value(&mut self) -> Vec<Option<String>> {
-        let mut values = vec![];
-        let mut content = String::from("");
-        while let Some(t) = self.next_token_no_skip().map(|t| &t.token) {
-            match t {
-                Token::Backslash => {
-                    if self.consume_token(&Token::Period) {
-                        return values;
-                    }
-                    if let Token::Word(w) = self.next_token().token {
-                        if w.value == "N" {
-                            values.push(None);
-                        }
-                    }
-                }
-                _ => {
-                    content.push_str(&t.to_string());
-                }
-            }
-        }
-        values
-    }
-
     /// Parse a literal value (numbers, strings, date/time, booleans)
     pub fn parse_value(&mut self) -> Result<ValueWithSpan, ParserError> {
         let next_token = self.next_token();
@@ -10069,7 +10112,7 @@ impl<'a> Parser<'a> {
                 // 2. Not calling self.next_token() to enforce `tok`
                 //    be followed immediately by a word/number, ie.
                 //    without any whitespace in between
-                let next_token = self.next_token_no_skip().unwrap_or(&EOF_TOKEN).clone();
+                let next_token = self.next_token();
                 let ident = match next_token.token {
                     Token::Word(w) => Ok(w.into_ident(next_token.span)),
                     Token::Number(w, false) => Ok(Ident::with_span(next_token.span, w)),
@@ -11293,54 +11336,66 @@ impl<'a> Parser<'a> {
     /// Return a tuple of the identifier and a boolean indicating it ends with a period.
     fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, bool), ParserError> {
         match self.peek_token().token {
+            Token::UnquotedDashStringLiteral(lit) => {
+                let span = self.next_token().span;
+                Ok((
+                    Ident {
+                        value: lit,
+                        quote_style: None,
+                        span,
+                    },
+                    false,
+                ))
+            }
             Token::Word(w) => {
                 let quote_style_is_none = w.quote_style.is_none();
                 let mut requires_whitespace = false;
                 let mut ident = w.into_ident(self.next_token().span);
                 if quote_style_is_none {
-                    while matches!(self.peek_token_no_skip().token, Token::Minus) {
-                        self.next_token();
-                        ident.value.push('-');
-
-                        let token = self
-                            .next_token_no_skip()
-                            .cloned()
-                            .unwrap_or(TokenWithSpan::wrap(Token::EOF));
-                        requires_whitespace = match token.token {
-                            Token::Word(next_word) if next_word.quote_style.is_none() => {
-                                ident.value.push_str(&next_word.value);
-                                false
-                            }
-                            Token::Number(s, false) => {
-                                // A number token can represent a decimal value ending with a period, e.g., `Number('123.')`.
-                                // However, for an [ObjectName], it is part of a hyphenated identifier, e.g., `foo-123.bar`.
-                                //
-                                // If a number token is followed by a period, it is part of an [ObjectName].
-                                // Return the identifier with `true` if the number token is followed by a period, indicating that
-                                // parsing should continue for the next part of the hyphenated identifier.
-                                if s.ends_with('.') {
-                                    let Some(s) = s.split('.').next().filter(|s| {
-                                        !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
-                                    }) else {
-                                        return self.expected(
-                                            "continuation of hyphenated identifier",
-                                            TokenWithSpan::new(Token::Number(s, false), token.span),
-                                        );
-                                    };
-                                    ident.value.push_str(s);
-                                    return Ok((ident, true));
-                                } else {
-                                    ident.value.push_str(&s);
-                                }
-                                // If next token is period, then it is part of an ObjectName and we don't expect whitespace
-                                // after the number.
-                                !matches!(self.peek_token().token, Token::Period)
-                            }
-                            _ => {
-                                return self
-                                    .expected("continuation of hyphenated identifier", token);
-                            }
-                        }
+                    while matches!(self.peek_token().token, Token::Minus) {
+                        unreachable!("Something went wrong in the tokenizer!");
+                        // self.next_token();
+                        // ident.value.push('-');
+
+                        // let token = self
+                        //     .next_token_no_skip()
+                        //     .cloned()
+                        //     .unwrap_or(TokenWithSpan::wrap(Token::EOF));
+                        // requires_whitespace = match token.token {
+                        //     Token::Word(next_word) if next_word.quote_style.is_none() => {
+                        //         ident.value.push_str(&next_word.value);
+                        //         false
+                        //     }
+                        //     Token::Number(s, false) => {
+                        //         // A number token can represent a decimal value ending with a period, e.g., `Number('123.')`.
+                        //         // However, for an [ObjectName], it is part of a hyphenated identifier, e.g., `foo-123.bar`.
+                        //         //
+                        //         // If a number token is followed by a period, it is part of an [ObjectName].
+                        //         // Return the identifier with `true` if the number token is followed by a period, indicating that
+                        //         // parsing should continue for the next part of the hyphenated identifier.
+                        //         if s.ends_with('.') {
+                        //             let Some(s) = s.split('.').next().filter(|s| {
+                        //                 !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
+                        //             }) else {
+                        //                 return self.expected(
+                        //                     "continuation of hyphenated identifier",
+                        //                     TokenWithSpan::new(Token::Number(s, false), token.span),
+                        //                 );
+                        //             };
+                        //             ident.value.push_str(s);
+                        //             return Ok((ident, true));
+                        //         } else {
+                        //             ident.value.push_str(&s);
+                        //         }
+                        //         // If next token is period, then it is part of an ObjectName and we don't expect whitespace
+                        //         // after the number.
+                        //         !matches!(self.peek_token().token, Token::Period)
+                        //     }
+                        //     _ => {
+                        //         return self
+                        //             .expected("continuation of hyphenated identifier", token);
+                        //     }
+                        // }
                     }
 
                     // If the last segment was a number, we must check that it's followed by whitespace,
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 1dffb8c58..82415e056 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -96,6 +96,11 @@ pub enum Token {
     /// Triple double quoted literal with raw string prefix. Example `R"""abc"""`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleDoubleQuotedRawStringLiteral(String),
+    /// An unquoted string literal containing dashes, i.e: 'first-second',
+    /// which is allowed in some BigQuery contexts
+    UnquotedDashStringLiteral(String),
+    /// A CSV body from a `COPY ... FROM STDIN` statement
+    CopyFromStdin(String),
     /// "National" string literal: i.e: N'string'
     NationalStringLiteral(String),
     /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
@@ -301,6 +306,8 @@ impl fmt::Display for Token {
             Token::DoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"{s}\""),
             Token::TripleSingleQuotedRawStringLiteral(ref s) => write!(f, "R'''{s}'''"),
             Token::TripleDoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"\"\"{s}\"\"\""),
+            Token::UnquotedDashStringLiteral(ref s) => write!(f, "{s}"),
+            Token::CopyFromStdin(ref s) => write!(f, "{s}\\."),
             Token::Comma => f.write_str(","),
             Token::DoubleEq => f.write_str("=="),
             Token::Spaceship => f.write_str("<=>"),
@@ -387,14 +394,18 @@ impl fmt::Display for Token {
 }
 
 impl Token {
-    pub fn make_keyword(keyword: &str) -> Self {
+    pub fn make_keyword<S: Into<String>>(keyword: S) -> Self {
         Token::make_word(keyword, None)
     }
 
-    pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
+    pub fn make_word<S: Into<String>>(word: S, quote_style: Option<char>) -> Self {
+        let word = word.into();
+        if quote_style.is_none() && word.contains('-') {
+            return Token::UnquotedDashStringLiteral(word);
+        }
         let word_uppercase = word.to_uppercase();
         Token::Word(Word {
-            value: word.to_string(),
+            value: word,
             quote_style,
             keyword: if quote_style.is_none() {
                 let keyword = ALL_KEYWORDS.binary_search(&word_uppercase.as_str());
@@ -777,6 +788,62 @@ struct TokenizeQuotedStringSettings {
     backslash_escape: bool,
 }
 
+#[derive(Clone, Copy, Default)]
+/// Helper struct to handle the logic of the `COPY ... FROM STDIN` statement
+/// which may occur in PostgreSQL and some other dialects.
+struct CopyStdinHandler {
+    previous_copy_token_found: bool,
+    previous_stdin_token_found: bool,
+    current_copy_token_found: bool,
+    current_stdin_token_found: bool,
+}
+
+impl CopyStdinHandler {
+    /// Update the internal state based on the provided token.
+    fn update(&mut self, token: &Token) {
+        match token {
+            Token::Word(Word {
+                keyword: Keyword::COPY,
+                ..
+            }) => {
+                self.current_copy_token_found = true;
+            }
+            Token::Word(Word {
+                keyword: Keyword::STDIN,
+                ..
+            }) if self.current_copy_token_found => {
+                self.current_stdin_token_found = true;
+            }
+            Token::SemiColon => {
+                self.previous_copy_token_found = self.current_copy_token_found;
+                self.previous_stdin_token_found = self.current_stdin_token_found;
+                self.current_copy_token_found = false;
+                self.current_stdin_token_found = false;
+            }
+            _ => {}
+        }
+    }
+
+    /// Returns whether the previous tokens indicated a `COPY ... FROM STDIN` statement.
+    fn is_in_copy_from_stdin(&self) -> bool {
+        self.previous_copy_token_found && self.previous_stdin_token_found
+    }
+
+    /// Extracts the CSV string from the provided State.
+    fn extract_csv_string(&self, state: &mut State) -> Result<String, TokenizerError> {
+        let mut csv_string = String::new();
+        let mut last_character_was_cr = false;
+        while let Some(ch) = state.next() {
+            if last_character_was_cr && ch == '\\' && state.peek() == Some(&'.') {
+                break;
+            }
+            last_character_was_cr = ch == '\n';
+            csv_string.push(ch);
+        }
+        Ok(csv_string)
+    }
+}
+
 /// SQL Tokenizer
 pub struct Tokenizer<'a> {
     dialect: &'a dyn Dialect,
@@ -870,14 +937,29 @@ impl<'a> Tokenizer<'a> {
             line: 1,
             col: 1,
         };
+        let mut cs_handler = CopyStdinHandler::default();
 
         let mut location = state.location();
-        while let Some(token) = self.next_token(&mut location, &mut state, buf.last().map(|t| &t.token), false)? {
+        while let Some(token) = self.next_token(
+            &mut location,
+            &mut state,
+            buf.last().map(|t| &t.token),
+            false,
+        )? {
             let span = location.span_to(state.location());
-
+            cs_handler.update(&token);
             buf.push(TokenWithSpan { token, span });
-
             location = state.location();
+
+            if cs_handler.is_in_copy_from_stdin() {
+                let csv_string = cs_handler.extract_csv_string(&mut state)?;
+                let span = location.span_to(state.location());
+                buf.push(TokenWithSpan {
+                    token: Token::CopyFromStdin(csv_string),
+                    span,
+                });
+                location = state.location();
+            }
         }
         Ok(())
     }
@@ -905,7 +987,7 @@ impl<'a> Tokenizer<'a> {
             return Ok(Some(Token::Number(s, false)));
         }
 
-        Ok(Some(Token::make_word(&word, None)))
+        Ok(Some(Token::make_word(word, None)))
     }
 
     /// Returns a standardized error if the previous token is a `:` and
@@ -917,7 +999,8 @@ impl<'a> Tokenizer<'a> {
     ) -> Result<Option<Token>, TokenizerError> {
         if let Some(Token::Colon) = prev_token {
             return Err(TokenizerError {
-                message: "Unexpected whitespace after ':'; did you mean ':placeholder' or '::'?".to_string(),
+                message: "Unexpected whitespace after ':'; did you mean ':placeholder' or '::'?"
+                    .to_string(),
                 location: chars.location(),
             });
         }
@@ -939,7 +1022,7 @@ impl<'a> Tokenizer<'a> {
                     chars.next(); // consume
                     *location = chars.location();
                     self.next_token(location, chars, prev_token, true)
-                },
+                }
                 // BigQuery and MySQL use b or B for byte string literal, Postgres for bit strings
                 b @ 'B' | b @ 'b' if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | MySqlDialect | GenericDialect) =>
                 {
@@ -976,7 +1059,7 @@ impl<'a> Tokenizer<'a> {
                         _ => {
                             // regular identifier starting with an "b" or "B"
                             let s = self.tokenize_word(b, chars);
-                            Ok(Some(Token::make_word(&s, None)))
+                            Ok(Some(Token::make_word(s, None)))
                         }
                     }
                 }
@@ -1003,7 +1086,7 @@ impl<'a> Tokenizer<'a> {
                         _ => {
                             // regular identifier starting with an "r" or "R"
                             let s = self.tokenize_word(b, chars);
-                            Ok(Some(Token::make_word(&s, None)))
+                            Ok(Some(Token::make_word(s, None)))
                         }
                     }
                 }
@@ -1022,7 +1105,7 @@ impl<'a> Tokenizer<'a> {
                         _ => {
                             // regular identifier starting with an "N"
                             let s = self.tokenize_word(n, chars);
-                            Ok(Some(Token::make_word(&s, None)))
+                            Ok(Some(Token::make_word(s, None)))
                         }
                     }
                 }
@@ -1039,7 +1122,7 @@ impl<'a> Tokenizer<'a> {
                         _ => {
                             // regular identifier starting with an "E" or "e"
                             let s = self.tokenize_word(x, chars);
-                            Ok(Some(Token::make_word(&s, None)))
+                            Ok(Some(Token::make_word(s, None)))
                         }
                     }
                 }
@@ -1058,7 +1141,7 @@ impl<'a> Tokenizer<'a> {
                     }
                     // regular identifier starting with an "U" or "u"
                     let s = self.tokenize_word(x, chars);
-                    Ok(Some(Token::make_word(&s, None)))
+                    Ok(Some(Token::make_word(s, None)))
                 }
                 // The spec only allows an uppercase 'X' to introduce a hex
                 // string, but PostgreSQL, at least, allows a lowercase 'x' too.
@@ -1073,7 +1156,7 @@ impl<'a> Tokenizer<'a> {
                         _ => {
                             // regular identifier starting with an "X"
                             let s = self.tokenize_word(x, chars);
-                            Ok(Some(Token::make_word(&s, None)))
+                            Ok(Some(Token::make_word(s, None)))
                         }
                     }
                 }
@@ -1122,7 +1205,7 @@ impl<'a> Tokenizer<'a> {
                 // delimited (quoted) identifier
                 quote_start if self.dialect.is_delimited_identifier_start(ch) => {
                     let word = self.tokenize_quoted_identifier(quote_start, chars)?;
-                    Ok(Some(Token::make_word(&word, Some(quote_start))))
+                    Ok(Some(Token::make_word(word, Some(quote_start))))
                 }
                 // Potentially nested delimited (quoted) identifier
                 quote_start
@@ -1146,7 +1229,7 @@ impl<'a> Tokenizer<'a> {
 
                     let Some(nested_quote_start) = nested_quote_start else {
                         let word = self.tokenize_quoted_identifier(quote_start, chars)?;
-                        return Ok(Some(Token::make_word(&word, Some(quote_start))));
+                        return Ok(Some(Token::make_word(word, Some(quote_start))));
                     };
 
                     let mut word = vec![];
@@ -1174,7 +1257,7 @@ impl<'a> Tokenizer<'a> {
                     }
                     chars.next(); // skip close delimiter
 
-                    Ok(Some(Token::make_word(&word.concat(), Some(quote_start))))
+                    Ok(Some(Token::make_word(word.concat(), Some(quote_start))))
                 }
                 // numbers and period
                 '0'..='9' | '.' => {
@@ -1284,12 +1367,12 @@ impl<'a> Tokenizer<'a> {
 
                             if !word.is_empty() {
                                 s += word.as_str();
-                                return Ok(Some(Token::make_word(s.as_str(), None)));
+                                return Ok(Some(Token::make_word(s, None)));
                             }
                         } else if prev_token == Some(&Token::Period) {
                             // If the previous token was a period, thus not belonging to a number,
                             // the value we have is part of an identifier.
-                            return Ok(Some(Token::make_word(s.as_str(), None)));
+                            return Ok(Some(Token::make_word(s, None)));
                         }
                     }
 
@@ -1319,7 +1402,7 @@ impl<'a> Tokenizer<'a> {
                             if is_comment {
                                 self.handle_colon_space_error(chars, prev_token)?;
                                 chars.next(); // consume second '-'
-                                // Consume the rest of the line as comment
+                                              // Consume the rest of the line as comment
                                 let _comment = self.tokenize_single_line_comment(chars);
                                 *location = chars.location();
                                 return self.next_token(location, chars, prev_token, true);
@@ -1351,7 +1434,7 @@ impl<'a> Tokenizer<'a> {
                         Some('/') if dialect_of!(self is SnowflakeDialect) => {
                             self.handle_colon_space_error(chars, prev_token)?;
                             chars.next(); // consume the second '/', starting a snowflake single-line comment
-                            // Consume the rest of the line as comment
+                                          // Consume the rest of the line as comment
                             let _comment = self.tokenize_single_line_comment(chars);
                             *location = chars.location();
                             self.next_token(location, chars, prev_token, true)
@@ -1556,7 +1639,7 @@ impl<'a> Tokenizer<'a> {
                 {
                     self.handle_colon_space_error(chars, prev_token)?;
                     chars.next(); // consume the '#', starting a snowflake single-line comment
-                    // Consume the rest of the line as comment
+                                  // Consume the rest of the line as comment
                     let _comment = self.tokenize_single_line_comment(chars);
                     *location = chars.location();
                     self.next_token(location, chars, prev_token, true)
@@ -1871,7 +1954,7 @@ impl<'a> Tokenizer<'a> {
     fn tokenize_word(&self, first_chars: impl Into<String>, chars: &mut State) -> String {
         let mut s = first_chars.into();
         s.push_str(&peeking_take_while(chars, |ch| {
-            self.dialect.is_identifier_part(ch)
+            self.dialect.is_identifier_part(ch) || ch == '-' && self.dialect.is_identifier_part('-')
         }));
         s
     }
@@ -2703,10 +2786,7 @@ mod tests {
         let dialect = GenericDialect {};
         let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
         // println!("tokens: {:#?}", tokens);
-        let expected = vec![
-            Token::Char('💝'),
-            Token::make_word("مصطفىh", None),
-        ];
+        let expected = vec![Token::Char('💝'), Token::make_word("مصطفىh", None)];
         compare(expected, tokens);
     }
 
@@ -2992,9 +3072,7 @@ mod tests {
             ),
             (
                 String::from("0--this is a comment\r1"),
-                vec![
-                    Token::Number("0".to_string(), false),
-                ],
+                vec![Token::Number("0".to_string(), false)],
             ),
             (
                 String::from("0--this is a comment\r\n1"),
@@ -3715,49 +3793,25 @@ mod tests {
             );
 
         all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace())
-            .tokenizes_to(
-                "SELECT -- 'abc'",
-                vec![
-                    Token::make_keyword("SELECT"),
-                ],
-            );
+            .tokenizes_to("SELECT -- 'abc'", vec![Token::make_keyword("SELECT")]);
 
         all_dialects_where(|dialect| dialect.requires_single_line_comment_whitespace())
             .tokenizes_to(
                 "SELECT --",
-                vec![
-                    Token::make_keyword("SELECT"),
-                    Token::Minus,
-                    Token::Minus,
-                ],
+                vec![Token::make_keyword("SELECT"), Token::Minus, Token::Minus],
             );
     }
 
     #[test]
     fn test_whitespace_not_required_after_single_line_comment() {
         all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace())
-            .tokenizes_to(
-                "SELECT --'abc'",
-                vec![
-                    Token::make_keyword("SELECT"),
-                ],
-            );
+            .tokenizes_to("SELECT --'abc'", vec![Token::make_keyword("SELECT")]);
 
         all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace())
-            .tokenizes_to(
-                "SELECT -- 'abc'",
-                vec![
-                    Token::make_keyword("SELECT"),
-                ],
-            );
+            .tokenizes_to("SELECT -- 'abc'", vec![Token::make_keyword("SELECT")]);
 
         all_dialects_where(|dialect| !dialect.requires_single_line_comment_whitespace())
-            .tokenizes_to(
-                "SELECT --",
-                vec![
-                    Token::make_keyword("SELECT"),
-                ],
-            );
+            .tokenizes_to("SELECT --", vec![Token::make_keyword("SELECT")]);
     }
 
     #[test]

From 93ea5d2458566251225ab47f7a32a0e86059f9d3 Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Wed, 29 Oct 2025 09:35:54 +0100
Subject: [PATCH 05/10] Extended CSV STDIN tests and resolved more corner cases
 in tokenizer

---
 src/ast/mod.rs              |    2 +-
 src/dialect/bigquery.rs     |    6 +-
 src/dialect/mod.rs          |    5 +
 src/parser/mod.rs           |  155 ++--
 src/test_utils.rs           |    1 +
 src/tokenizer.rs            | 1387 ++++++++++++++++++-----------------
 tests/sqlparser_common.rs   |    1 +
 tests/sqlparser_postgres.rs |   52 +-
 8 files changed, 834 insertions(+), 775 deletions(-)

diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 184560a96..6ddf32819 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -4649,7 +4649,7 @@ impl fmt::Display for Statement {
                     let data = String::from_utf8(writer.into_inner().map_err(|_| fmt::Error)?)
                         .map_err(|_| fmt::Error)?;
                     write!(f, "{}", data)?;
-                    write!(f, "\n\\.")?;
+                    write!(f, "\\.")?;
                 }
                 Ok(())
             }
diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs
index 78b830fc9..c8a50dd66 100644
--- a/src/dialect/bigquery.rs
+++ b/src/dialect/bigquery.rs
@@ -83,7 +83,11 @@ impl Dialect for BigQueryDialect {
     }
 
     fn is_identifier_part(&self, ch: char) -> bool {
-        ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_' || ch == '-'
+        ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
+    }
+
+    fn supports_hyphenated_identifiers(&self) -> bool {
+        true
     }
 
     /// See [doc](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index ef4e1cdde..abc8291d7 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -178,6 +178,11 @@ pub trait Dialect: Debug + Any {
     /// Determine if a character is a valid unquoted identifier character
     fn is_identifier_part(&self, ch: char) -> bool;
 
+    /// Returns whether the dialect supports hyphenated identifiers
+    fn supports_hyphenated_identifiers(&self) -> bool {
+        false
+    }
+
     /// Most dialects do not have custom operators. Override this method to provide custom operators.
     fn is_custom_operator_part(&self, _ch: char) -> bool {
         false
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 42dc758fb..90c52bb87 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -9539,13 +9539,11 @@ impl<'a> Parser<'a> {
         legacy_options: &[CopyLegacyOption],
     ) -> Result<Vec<Vec<Option<String>>>, ParserError> {
         let Token::CopyFromStdin(body) = self.next_token().token else {
-            return self.expected(
-                "COPY ... FROM STDIN with CSV body",
-                self.peek_token(),
-            );
+            return self.expected("COPY ... FROM STDIN with CSV body", self.peek_token());
         };
 
         let mut reader_builder = csv::ReaderBuilder::new();
+        reader_builder.has_headers(false);
 
         let mut null_symbol = "\\N";
 
@@ -11336,80 +11334,69 @@ impl<'a> Parser<'a> {
     /// Return a tuple of the identifier and a boolean indicating it ends with a period.
     fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, bool), ParserError> {
         match self.peek_token().token {
-            Token::UnquotedDashStringLiteral(lit) => {
-                let span = self.next_token().span;
-                Ok((
-                    Ident {
-                        value: lit,
-                        quote_style: None,
-                        span,
-                    },
-                    false,
-                ))
-            }
-            Token::Word(w) => {
-                let quote_style_is_none = w.quote_style.is_none();
-                let mut requires_whitespace = false;
-                let mut ident = w.into_ident(self.next_token().span);
-                if quote_style_is_none {
-                    while matches!(self.peek_token().token, Token::Minus) {
-                        unreachable!("Something went wrong in the tokenizer!");
-                        // self.next_token();
-                        // ident.value.push('-');
-
-                        // let token = self
-                        //     .next_token_no_skip()
-                        //     .cloned()
-                        //     .unwrap_or(TokenWithSpan::wrap(Token::EOF));
-                        // requires_whitespace = match token.token {
-                        //     Token::Word(next_word) if next_word.quote_style.is_none() => {
-                        //         ident.value.push_str(&next_word.value);
-                        //         false
-                        //     }
-                        //     Token::Number(s, false) => {
-                        //         // A number token can represent a decimal value ending with a period, e.g., `Number('123.')`.
-                        //         // However, for an [ObjectName], it is part of a hyphenated identifier, e.g., `foo-123.bar`.
-                        //         //
-                        //         // If a number token is followed by a period, it is part of an [ObjectName].
-                        //         // Return the identifier with `true` if the number token is followed by a period, indicating that
-                        //         // parsing should continue for the next part of the hyphenated identifier.
-                        //         if s.ends_with('.') {
-                        //             let Some(s) = s.split('.').next().filter(|s| {
-                        //                 !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
-                        //             }) else {
-                        //                 return self.expected(
-                        //                     "continuation of hyphenated identifier",
-                        //                     TokenWithSpan::new(Token::Number(s, false), token.span),
-                        //                 );
-                        //             };
-                        //             ident.value.push_str(s);
-                        //             return Ok((ident, true));
-                        //         } else {
-                        //             ident.value.push_str(&s);
-                        //         }
-                        //         // If next token is period, then it is part of an ObjectName and we don't expect whitespace
-                        //         // after the number.
-                        //         !matches!(self.peek_token().token, Token::Period)
-                        //     }
-                        //     _ => {
-                        //         return self
-                        //             .expected("continuation of hyphenated identifier", token);
-                        //     }
-                        // }
-                    }
-
-                    // If the last segment was a number, we must check that it's followed by whitespace,
-                    // otherwise foo-123a will be parsed as `foo-123` with the alias `a`.
-                    if requires_whitespace {
-                        let token = self.next_token();
-                        if !matches!(token.token, Token::EOF) {
-                            return self
-                                .expected("whitespace following hyphenated identifier", token);
-                        }
-                    }
-                }
-                Ok((ident, false))
-            }
+            // Token::Word(w) => {
+            //     let quote_style_is_none = w.quote_style.is_none();
+            //     let mut requires_whitespace = false;
+            //     let mut ident = w.into_ident(self.next_token().span);
+            //     if quote_style_is_none {
+            //         while matches!(self.peek_token().token, Token::Minus) {
+            //             unreachable!("Something went wrong in the tokenizer!");
+            //             // self.next_token();
+            //             // ident.value.push('-');
+
+            //             // let token = self
+            //             //     .next_token_no_skip()
+            //             //     .cloned()
+            //             //     .unwrap_or(TokenWithSpan::wrap(Token::EOF));
+            //             // requires_whitespace = match token.token {
+            //             //     Token::Word(next_word) if next_word.quote_style.is_none() => {
+            //             //         ident.value.push_str(&next_word.value);
+            //             //         false
+            //             //     }
+            //             //     Token::Number(s, false) => {
+            //             //         // A number token can represent a decimal value ending with a period, e.g., `Number('123.')`.
+            //             //         // However, for an [ObjectName], it is part of a hyphenated identifier, e.g., `foo-123.bar`.
+            //             //         //
+            //             //         // If a number token is followed by a period, it is part of an [ObjectName].
+            //             //         // Return the identifier with `true` if the number token is followed by a period, indicating that
+            //             //         // parsing should continue for the next part of the hyphenated identifier.
+            //             //         if s.ends_with('.') {
+            //             //             let Some(s) = s.split('.').next().filter(|s| {
+            //             //                 !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
+            //             //             }) else {
+            //             //                 return self.expected(
+            //             //                     "continuation of hyphenated identifier",
+            //             //                     TokenWithSpan::new(Token::Number(s, false), token.span),
+            //             //                 );
+            //             //             };
+            //             //             ident.value.push_str(s);
+            //             //             return Ok((ident, true));
+            //             //         } else {
+            //             //             ident.value.push_str(&s);
+            //             //         }
+            //             //         // If next token is period, then it is part of an ObjectName and we don't expect whitespace
+            //             //         // after the number.
+            //             //         !matches!(self.peek_token().token, Token::Period)
+            //             //     }
+            //             //     _ => {
+            //             //         return self
+            //             //             .expected("continuation of hyphenated identifier", token);
+            //             //     }
+            //             // }
+            //         }
+
+            //         // If the last segment was a number, we must check that it's followed by whitespace,
+            //         // otherwise foo-123a will be parsed as `foo-123` with the alias `a`.
+            //         if requires_whitespace {
+            //             let token = self.next_token();
+            //             if !matches!(token.token, Token::EOF) {
+            //                 return self
+            //                     .expected("whitespace following hyphenated identifier", token);
+            //             }
+            //         }
+            //     }
+            //     Ok((ident, false))
+            // }
             _ => Ok((self.parse_identifier()?, false)),
         }
     }
@@ -18530,9 +18517,17 @@ mod tests {
 
     #[test]
     fn test_placeholder_invalid_whitespace() {
-        for w in [" ", "  ", "/*invalid*/", "\n", "\t", "\r\n", "--comment\n"] {
+        for w in [
+            "  ",
+            "/*invalid*/",
+            "\n",
+            "\t\t",
+            "\r\n",
+            "--comment\n",
+            "/* multi\nline\ncomment */",
+        ] {
             let sql = format!("\nSELECT\n  :{w}fooBar");
-            assert!(Parser::parse_sql(&GenericDialect, &sql).is_err());
+            assert!(Parser::parse_sql(&GenericDialect, &sql).is_err(), "Failed to error on when inserting the whitespace {w:?} within the placeholder SQL: `{sql}`");
         }
     }
 }
diff --git a/src/test_utils.rs b/src/test_utils.rs
index a8c8afd59..978447d96 100644
--- a/src/test_utils.rs
+++ b/src/test_utils.rs
@@ -154,6 +154,7 @@ impl TestedDialects {
     ///
     ///  For multiple statements, use [`statements_parse_to`].
     pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement {
+        println!("Testing SQL: {}", sql);
         let mut statements = self.parse_sql_statements(sql).expect(sql);
         assert_eq!(statements.len(), 1);
         if !canonical.is_empty() && sql != canonical {
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 82415e056..f49468fe3 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -96,9 +96,6 @@ pub enum Token {
     /// Triple double quoted literal with raw string prefix. Example `R"""abc"""`
     /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals)
     TripleDoubleQuotedRawStringLiteral(String),
-    /// An unquoted string literal containing dashes, i.e: 'first-second',
-    /// which is allowed in some BigQuery contexts
-    UnquotedDashStringLiteral(String),
     /// A CSV body from a `COPY ... FROM STDIN` statement
     CopyFromStdin(String),
     /// "National" string literal: i.e: N'string'
@@ -306,7 +303,6 @@ impl fmt::Display for Token {
             Token::DoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"{s}\""),
             Token::TripleSingleQuotedRawStringLiteral(ref s) => write!(f, "R'''{s}'''"),
             Token::TripleDoubleQuotedRawStringLiteral(ref s) => write!(f, "R\"\"\"{s}\"\"\""),
-            Token::UnquotedDashStringLiteral(ref s) => write!(f, "{s}"),
             Token::CopyFromStdin(ref s) => write!(f, "{s}\\."),
             Token::Comma => f.write_str(","),
             Token::DoubleEq => f.write_str("=="),
@@ -400,9 +396,6 @@ impl Token {
 
     pub fn make_word<S: Into<String>>(word: S, quote_style: Option<char>) -> Self {
         let word = word.into();
-        if quote_style.is_none() && word.contains('-') {
-            return Token::UnquotedDashStringLiteral(word);
-        }
         let word_uppercase = word.to_uppercase();
         Token::Word(Word {
             value: word,
@@ -835,6 +828,7 @@ impl CopyStdinHandler {
         let mut last_character_was_cr = false;
         while let Some(ch) = state.next() {
             if last_character_was_cr && ch == '\\' && state.peek() == Some(&'.') {
+                state.next(); // consume the '.'
                 break;
             }
             last_character_was_cr = ch == '\n';
@@ -937,6 +931,7 @@ impl<'a> Tokenizer<'a> {
             line: 1,
             col: 1,
         };
+        let mut prev_keyword = None;
         let mut cs_handler = CopyStdinHandler::default();
 
         let mut location = state.location();
@@ -944,8 +939,15 @@ impl<'a> Tokenizer<'a> {
             &mut location,
             &mut state,
             buf.last().map(|t| &t.token),
+            prev_keyword,
             false,
         )? {
+            if let Token::Word(Word { keyword, .. }) = &token {
+                if *keyword != Keyword::NoKeyword {
+                    prev_keyword = Some(*keyword);
+                }
+            }
+
             let span = location.span_to(state.location());
             cs_handler.update(&token);
             buf.push(TokenWithSpan { token, span });
@@ -969,10 +971,11 @@ impl<'a> Tokenizer<'a> {
         &self,
         ch: impl IntoIterator<Item = char>,
         chars: &mut State,
+        prev_keyword: Option<Keyword>,
     ) -> Result<Option<Token>, TokenizerError> {
         chars.next(); // consume the first char
         let ch: String = ch.into_iter().collect();
-        let word = self.tokenize_word(ch, chars);
+        let word = self.tokenize_word(ch, chars, prev_keyword)?;
 
         // TODO: implement parsing of exponent here
         if word.chars().all(|x| x.is_ascii_digit() || x == '.') {
@@ -996,7 +999,11 @@ impl<'a> Tokenizer<'a> {
         &self,
         chars: &State,
         prev_token: Option<&Token>,
-    ) -> Result<Option<Token>, TokenizerError> {
+        preceded_by_whitespace: bool,
+    ) -> Result<(), TokenizerError> {
+        if !preceded_by_whitespace {
+            return Ok(());
+        }
         if let Some(Token::Colon) = prev_token {
             return Err(TokenizerError {
                 message: "Unexpected whitespace after ':'; did you mean ':placeholder' or '::'?"
@@ -1004,7 +1011,7 @@ impl<'a> Tokenizer<'a> {
                 location: chars.location(),
             });
         }
-        Ok(None)
+        Ok(())
     }
 
     /// Get the next token or return None
@@ -1013,773 +1020,774 @@ impl<'a> Tokenizer<'a> {
         location: &mut Location,
         chars: &mut State,
         prev_token: Option<&Token>,
+        prev_keyword: Option<Keyword>,
         preceded_by_whitespace: bool,
     ) -> Result<Option<Token>, TokenizerError> {
-        match chars.peek() {
-            Some(&ch) => match ch {
-                ' ' | '\t' | '\n' | '\r' => {
-                    self.handle_colon_space_error(chars, prev_token)?;
-                    chars.next(); // consume
-                    *location = chars.location();
-                    self.next_token(location, chars, prev_token, true)
-                }
-                // BigQuery and MySQL use b or B for byte string literal, Postgres for bit strings
-                b @ 'B' | b @ 'b' if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | MySqlDialect | GenericDialect) =>
-                {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('\'') => {
-                            if self.dialect.supports_triple_quoted_string() {
-                                return self
-                                    .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
-                                        chars,
-                                        '\'',
-                                        false,
-                                        Token::SingleQuotedByteStringLiteral,
-                                        Token::TripleSingleQuotedByteStringLiteral,
-                                    );
-                            }
-                            let s = self.tokenize_single_quoted_string(chars, '\'', false)?;
-                            Ok(Some(Token::SingleQuotedByteStringLiteral(s)))
-                        }
-                        Some('\"') => {
-                            if self.dialect.supports_triple_quoted_string() {
-                                return self
-                                    .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
-                                        chars,
-                                        '"',
-                                        false,
-                                        Token::DoubleQuotedByteStringLiteral,
-                                        Token::TripleDoubleQuotedByteStringLiteral,
-                                    );
-                            }
-                            let s = self.tokenize_single_quoted_string(chars, '\"', false)?;
-                            Ok(Some(Token::DoubleQuotedByteStringLiteral(s)))
-                        }
-                        _ => {
-                            // regular identifier starting with an "b" or "B"
-                            let s = self.tokenize_word(b, chars);
-                            Ok(Some(Token::make_word(s, None)))
+        let Some(&ch) = chars.peek() else {
+            return Ok(None);
+        };
+        match ch {
+            ' ' | '\t' | '\n' | '\r' => {
+                self.handle_colon_space_error(
+                    chars,
+                    prev_token,
+                    preceded_by_whitespace || ch == '\n',
+                )?;
+                chars.next(); // consume
+                *location = chars.location();
+                self.next_token(location, chars, prev_token, prev_keyword, true)
+            }
+            // BigQuery and MySQL use b or B for byte string literal, Postgres for bit strings
+            b @ 'B' | b @ 'b' if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | MySqlDialect | GenericDialect) =>
+            {
+                chars.next(); // consume
+                match chars.peek() {
+                    Some('\'') => {
+                        if self.dialect.supports_triple_quoted_string() {
+                            return self
+                                .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
+                                    chars,
+                                    '\'',
+                                    false,
+                                    Token::SingleQuotedByteStringLiteral,
+                                    Token::TripleSingleQuotedByteStringLiteral,
+                                );
                         }
+                        let s = self.tokenize_single_quoted_string(chars, '\'', false)?;
+                        Ok(Some(Token::SingleQuotedByteStringLiteral(s)))
                     }
-                }
-                // BigQuery uses r or R for raw string literal
-                b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('\'') => self
-                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
-                                chars,
-                                '\'',
-                                false,
-                                Token::SingleQuotedRawStringLiteral,
-                                Token::TripleSingleQuotedRawStringLiteral,
-                            ),
-                        Some('\"') => self
-                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
-                                chars,
-                                '"',
-                                false,
-                                Token::DoubleQuotedRawStringLiteral,
-                                Token::TripleDoubleQuotedRawStringLiteral,
-                            ),
-                        _ => {
-                            // regular identifier starting with an "r" or "R"
-                            let s = self.tokenize_word(b, chars);
-                            Ok(Some(Token::make_word(s, None)))
+                    Some('\"') => {
+                        if self.dialect.supports_triple_quoted_string() {
+                            return self
+                                .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
+                                    chars,
+                                    '"',
+                                    false,
+                                    Token::DoubleQuotedByteStringLiteral,
+                                    Token::TripleDoubleQuotedByteStringLiteral,
+                                );
                         }
+                        let s = self.tokenize_single_quoted_string(chars, '\"', false)?;
+                        Ok(Some(Token::DoubleQuotedByteStringLiteral(s)))
+                    }
+                    _ => {
+                        // regular identifier starting with an "b" or "B"
+                        let s = self.tokenize_word(b, chars, prev_keyword)?;
+                        Ok(Some(Token::make_word(s, None)))
                     }
                 }
-                // Redshift uses lower case n for national string literal
-                n @ 'N' | n @ 'n' => {
-                    chars.next(); // consume, to check the next char
-                    match chars.peek() {
-                        Some('\'') => {
-                            // N'...' - a <national character string literal>
-                            let backslash_escape =
-                                self.dialect.supports_string_literal_backslash_escape();
-                            let s =
-                                self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
-                            Ok(Some(Token::NationalStringLiteral(s)))
-                        }
-                        _ => {
-                            // regular identifier starting with an "N"
-                            let s = self.tokenize_word(n, chars);
-                            Ok(Some(Token::make_word(s, None)))
-                        }
+            }
+            // BigQuery uses r or R for raw string literal
+            b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
+                chars.next(); // consume
+                match chars.peek() {
+                    Some('\'') => self
+                        .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
+                            chars,
+                            '\'',
+                            false,
+                            Token::SingleQuotedRawStringLiteral,
+                            Token::TripleSingleQuotedRawStringLiteral,
+                        ),
+                    Some('\"') => self
+                        .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
+                            chars,
+                            '"',
+                            false,
+                            Token::DoubleQuotedRawStringLiteral,
+                            Token::TripleDoubleQuotedRawStringLiteral,
+                        ),
+                    _ => {
+                        // regular identifier starting with an "r" or "R"
+                        let s = self.tokenize_word(b, chars, prev_keyword)?;
+                        Ok(Some(Token::make_word(s, None)))
                     }
                 }
-                // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
-                x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
-                    let starting_loc = chars.location();
-                    chars.next(); // consume, to check the next char
-                    match chars.peek() {
-                        Some('\'') => {
-                            let s =
-                                self.tokenize_escaped_single_quoted_string(starting_loc, chars)?;
-                            Ok(Some(Token::EscapedStringLiteral(s)))
-                        }
-                        _ => {
-                            // regular identifier starting with an "E" or "e"
-                            let s = self.tokenize_word(x, chars);
-                            Ok(Some(Token::make_word(s, None)))
-                        }
+            }
+            // Redshift uses lower case n for national string literal
+            n @ 'N' | n @ 'n' => {
+                chars.next(); // consume, to check the next char
+                match chars.peek() {
+                    Some('\'') => {
+                        // N'...' - a <national character string literal>
+                        let backslash_escape =
+                            self.dialect.supports_string_literal_backslash_escape();
+                        let s =
+                            self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
+                        Ok(Some(Token::NationalStringLiteral(s)))
+                    }
+                    _ => {
+                        // regular identifier starting with an "N"
+                        let s = self.tokenize_word(n, chars, None)?;
+                        Ok(Some(Token::make_word(s, None)))
                     }
                 }
-                // Unicode string literals like U&'first \000A second' are supported in some dialects, including PostgreSQL
-                x @ 'u' | x @ 'U' if self.dialect.supports_unicode_string_literal() => {
-                    chars.next(); // consume, to check the next char
-                    if chars.peek() == Some(&'&') {
-                        // we cannot advance the iterator here, as we need to consume the '&' later if the 'u' was an identifier
-                        let mut chars_clone = chars.peekable.clone();
-                        chars_clone.next(); // consume the '&' in the clone
-                        if chars_clone.peek() == Some(&'\'') {
-                            chars.next(); // consume the '&' in the original iterator
-                            let s = unescape_unicode_single_quoted_string(chars)?;
-                            return Ok(Some(Token::UnicodeStringLiteral(s)));
-                        }
+            }
+            // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
+            x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
+                let starting_loc = chars.location();
+                chars.next(); // consume, to check the next char
+                match chars.peek() {
+                    Some('\'') => {
+                        let s = self.tokenize_escaped_single_quoted_string(starting_loc, chars)?;
+                        Ok(Some(Token::EscapedStringLiteral(s)))
+                    }
+                    _ => {
+                        // regular identifier starting with an "E" or "e"
+                        let s = self.tokenize_word(x, chars, prev_keyword)?;
+                        Ok(Some(Token::make_word(s, None)))
                     }
-                    // regular identifier starting with an "U" or "u"
-                    let s = self.tokenize_word(x, chars);
-                    Ok(Some(Token::make_word(s, None)))
                 }
-                // The spec only allows an uppercase 'X' to introduce a hex
-                // string, but PostgreSQL, at least, allows a lowercase 'x' too.
-                x @ 'x' | x @ 'X' => {
-                    chars.next(); // consume, to check the next char
-                    match chars.peek() {
-                        Some('\'') => {
-                            // X'...' - a <binary string literal>
-                            let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
-                            Ok(Some(Token::HexStringLiteral(s)))
-                        }
-                        _ => {
-                            // regular identifier starting with an "X"
-                            let s = self.tokenize_word(x, chars);
-                            Ok(Some(Token::make_word(s, None)))
-                        }
+            }
+            // Unicode string literals like U&'first \000A second' are supported in some dialects, including PostgreSQL
+            x @ 'u' | x @ 'U' if self.dialect.supports_unicode_string_literal() => {
+                chars.next(); // consume, to check the next char
+                if chars.peek() == Some(&'&') {
+                    // we cannot advance the iterator here, as we need to consume the '&' later if the 'u' was an identifier
+                    let mut chars_clone = chars.peekable.clone();
+                    chars_clone.next(); // consume the '&' in the clone
+                    if chars_clone.peek() == Some(&'\'') {
+                        chars.next(); // consume the '&' in the original iterator
+                        let s = unescape_unicode_single_quoted_string(chars)?;
+                        return Ok(Some(Token::UnicodeStringLiteral(s)));
                     }
                 }
-                // single quoted string
-                '\'' => {
-                    if self.dialect.supports_triple_quoted_string() {
-                        return self
-                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
-                                chars,
-                                '\'',
-                                self.dialect.supports_string_literal_backslash_escape(),
-                                Token::SingleQuotedString,
-                                Token::TripleSingleQuotedString,
-                            );
+                // regular identifier starting with an "U" or "u"
+                let s = self.tokenize_word(x, chars, prev_keyword)?;
+                Ok(Some(Token::make_word(s, None)))
+            }
+            // The spec only allows an uppercase 'X' to introduce a hex
+            // string, but PostgreSQL, at least, allows a lowercase 'x' too.
+            x @ 'x' | x @ 'X' => {
+                chars.next(); // consume, to check the next char
+                match chars.peek() {
+                    Some('\'') => {
+                        // X'...' - a <binary string literal>
+                        let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
+                        Ok(Some(Token::HexStringLiteral(s)))
+                    }
+                    _ => {
+                        // regular identifier starting with an "X"
+                        let s = self.tokenize_word(x, chars, prev_keyword)?;
+                        Ok(Some(Token::make_word(s, None)))
                     }
-                    let s = self.tokenize_single_quoted_string(
+                }
+            }
+            // single quoted string
+            '\'' => {
+                if self.dialect.supports_triple_quoted_string() {
+                    return self.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                         chars,
                         '\'',
                         self.dialect.supports_string_literal_backslash_escape(),
-                    )?;
-
-                    Ok(Some(Token::SingleQuotedString(s)))
+                        Token::SingleQuotedString,
+                        Token::TripleSingleQuotedString,
+                    );
                 }
-                // double quoted string
-                '\"' if !self.dialect.is_delimited_identifier_start(ch)
-                    && !self.dialect.is_identifier_start(ch) =>
-                {
-                    if self.dialect.supports_triple_quoted_string() {
-                        return self
-                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
-                                chars,
-                                '"',
-                                self.dialect.supports_string_literal_backslash_escape(),
-                                Token::DoubleQuotedString,
-                                Token::TripleDoubleQuotedString,
-                            );
-                    }
-                    let s = self.tokenize_single_quoted_string(
+                let s = self.tokenize_single_quoted_string(
+                    chars,
+                    '\'',
+                    self.dialect.supports_string_literal_backslash_escape(),
+                )?;
+
+                Ok(Some(Token::SingleQuotedString(s)))
+            }
+            // double quoted string
+            '\"' if !self.dialect.is_delimited_identifier_start(ch)
+                && !self.dialect.is_identifier_start(ch) =>
+            {
+                if self.dialect.supports_triple_quoted_string() {
+                    return self.tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                         chars,
                         '"',
                         self.dialect.supports_string_literal_backslash_escape(),
-                    )?;
-
-                    Ok(Some(Token::DoubleQuotedString(s)))
-                }
-                // delimited (quoted) identifier
-                quote_start if self.dialect.is_delimited_identifier_start(ch) => {
-                    let word = self.tokenize_quoted_identifier(quote_start, chars)?;
-                    Ok(Some(Token::make_word(word, Some(quote_start))))
+                        Token::DoubleQuotedString,
+                        Token::TripleDoubleQuotedString,
+                    );
                 }
-                // Potentially nested delimited (quoted) identifier
-                quote_start
-                    if self
-                        .dialect
-                        .is_nested_delimited_identifier_start(quote_start)
-                        && self
-                            .dialect
-                            .peek_nested_delimited_identifier_quotes(chars.peekable.clone())
-                            .is_some() =>
-                {
-                    let Some((quote_start, nested_quote_start)) = self
+                let s = self.tokenize_single_quoted_string(
+                    chars,
+                    '"',
+                    self.dialect.supports_string_literal_backslash_escape(),
+                )?;
+
+                Ok(Some(Token::DoubleQuotedString(s)))
+            }
+            // delimited (quoted) identifier
+            quote_start if self.dialect.is_delimited_identifier_start(ch) => {
+                let word = self.tokenize_quoted_identifier(quote_start, chars)?;
+                Ok(Some(Token::make_word(word, Some(quote_start))))
+            }
+            // Potentially nested delimited (quoted) identifier
+            quote_start
+                if self
+                    .dialect
+                    .is_nested_delimited_identifier_start(quote_start)
+                    && self
                         .dialect
                         .peek_nested_delimited_identifier_quotes(chars.peekable.clone())
-                    else {
-                        return self.tokenizer_error(
-                            chars.location(),
-                            format!("Expected nested delimiter '{quote_start}' before EOF."),
-                        );
-                    };
+                        .is_some() =>
+            {
+                let Some((quote_start, nested_quote_start)) = self
+                    .dialect
+                    .peek_nested_delimited_identifier_quotes(chars.peekable.clone())
+                else {
+                    return self.tokenizer_error(
+                        chars.location(),
+                        format!("Expected nested delimiter '{quote_start}' before EOF."),
+                    );
+                };
 
-                    let Some(nested_quote_start) = nested_quote_start else {
-                        let word = self.tokenize_quoted_identifier(quote_start, chars)?;
-                        return Ok(Some(Token::make_word(word, Some(quote_start))));
-                    };
+                let Some(nested_quote_start) = nested_quote_start else {
+                    let word = self.tokenize_quoted_identifier(quote_start, chars)?;
+                    return Ok(Some(Token::make_word(word, Some(quote_start))));
+                };
 
-                    let mut word = vec![];
-                    let quote_end = Word::matching_end_quote(quote_start);
-                    let nested_quote_end = Word::matching_end_quote(nested_quote_start);
-                    let error_loc = chars.location();
+                let mut word = vec![];
+                let quote_end = Word::matching_end_quote(quote_start);
+                let nested_quote_end = Word::matching_end_quote(nested_quote_start);
+                let error_loc = chars.location();
+
+                chars.next(); // skip the first delimiter
+                peeking_take_while(chars, |ch| ch.is_whitespace());
+                if chars.peek() != Some(&nested_quote_start) {
+                    return self.tokenizer_error(
+                        error_loc,
+                        format!("Expected nested delimiter '{nested_quote_start}' before EOF."),
+                    );
+                }
+                word.push(nested_quote_start.into());
+                word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?);
+                word.push(nested_quote_end.into());
+                peeking_take_while(chars, |ch| ch.is_whitespace());
+                if chars.peek() != Some(&quote_end) {
+                    return self.tokenizer_error(
+                        error_loc,
+                        format!("Expected close delimiter '{quote_end}' before EOF."),
+                    );
+                }
+                chars.next(); // skip close delimiter
 
-                    chars.next(); // skip the first delimiter
-                    peeking_take_while(chars, |ch| ch.is_whitespace());
-                    if chars.peek() != Some(&nested_quote_start) {
-                        return self.tokenizer_error(
-                            error_loc,
-                            format!("Expected nested delimiter '{nested_quote_start}' before EOF."),
-                        );
-                    }
-                    word.push(nested_quote_start.into());
-                    word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?);
-                    word.push(nested_quote_end.into());
-                    peeking_take_while(chars, |ch| ch.is_whitespace());
-                    if chars.peek() != Some(&quote_end) {
-                        return self.tokenizer_error(
-                            error_loc,
-                            format!("Expected close delimiter '{quote_end}' before EOF."),
-                        );
+                Ok(Some(Token::make_word(word.concat(), Some(quote_start))))
+            }
+            // numbers and period
+            '0'..='9' | '.' => {
+                // special case where if ._ is encountered after a word then that word
+                // is a table and the _ is the start of the col name.
+                // if the prev token is not a word, then this is not a valid sql
+                // word or number.
+                if ch == '.' && chars.peekable.clone().nth(1) == Some('_') {
+                    if !preceded_by_whitespace
+                        && !matches!(prev_token, Some(Token::Plus | Token::Minus))
+                    {
+                        chars.next();
+                        return Ok(Some(Token::Period));
                     }
-                    chars.next(); // skip close delimiter
 
-                    Ok(Some(Token::make_word(word.concat(), Some(quote_start))))
+                    return self
+                        .tokenizer_error(chars.location(), "Unexpected character '_'".to_string());
                 }
-                // numbers and period
-                '0'..='9' | '.' => {
-                    // special case where if ._ is encountered after a word then that word
-                    // is a table and the _ is the start of the col name.
-                    // if the prev token is not a word, then this is not a valid sql
-                    // word or number.
-                    if ch == '.' && chars.peekable.clone().nth(1) == Some('_') {
-                        if !preceded_by_whitespace {
-                            chars.next();
-                            return Ok(Some(Token::Period));
-                        }
 
-                        return self.tokenizer_error(
-                            chars.location(),
-                            "Unexpected character '_'".to_string(),
-                        );
-                    }
+                // Some dialects support underscore as number separator
+                // There can only be one at a time and it must be followed by another digit
+                let is_number_separator = |ch: char, next_char: Option<char>| {
+                    self.dialect.supports_numeric_literal_underscores()
+                        && ch == '_'
+                        && next_char.is_some_and(|next_ch| next_ch.is_ascii_hexdigit())
+                };
 
-                    // Some dialects support underscore as number separator
-                    // There can only be one at a time and it must be followed by another digit
-                    let is_number_separator = |ch: char, next_char: Option<char>| {
-                        self.dialect.supports_numeric_literal_underscores()
-                            && ch == '_'
-                            && next_char.is_some_and(|next_ch| next_ch.is_ascii_hexdigit())
-                    };
+                let mut s = peeking_next_take_while(chars, |ch, next_ch| {
+                    ch.is_ascii_digit() || is_number_separator(ch, next_ch)
+                });
 
-                    let mut s = peeking_next_take_while(chars, |ch, next_ch| {
-                        ch.is_ascii_digit() || is_number_separator(ch, next_ch)
+                // match binary literal that starts with 0x
+                if s == "0" && chars.peek() == Some(&'x') {
+                    chars.next();
+                    let s2 = peeking_next_take_while(chars, |ch, next_ch| {
+                        ch.is_ascii_hexdigit() || is_number_separator(ch, next_ch)
                     });
+                    return Ok(Some(Token::HexStringLiteral(s2)));
+                }
 
-                    // match binary literal that starts with 0x
-                    if s == "0" && chars.peek() == Some(&'x') {
-                        chars.next();
-                        let s2 = peeking_next_take_while(chars, |ch, next_ch| {
-                            ch.is_ascii_hexdigit() || is_number_separator(ch, next_ch)
-                        });
-                        return Ok(Some(Token::HexStringLiteral(s2)));
-                    }
-
-                    // match one period
-                    if let Some('.') = chars.peek() {
-                        s.push('.');
-                        chars.next();
-                    }
-
-                    // If the dialect supports identifiers that start with a numeric prefix
-                    // and we have now consumed a dot, check if the previous token was a Word.
-                    // If so, what follows is definitely not part of a decimal number and
-                    // we should yield the dot as a dedicated token so compound identifiers
-                    // starting with digits can be parsed correctly.
-                    if s == "." && self.dialect.supports_numeric_prefix() {
-                        if !preceded_by_whitespace {
-                            return Ok(Some(Token::Period));
-                        }
-                    }
-
-                    // Consume fractional digits.
-                    s += &peeking_next_take_while(chars, |ch, next_ch| {
-                        ch.is_ascii_digit() || is_number_separator(ch, next_ch)
-                    });
+                // match one period
+                if let Some('.') = chars.peek() {
+                    s.push('.');
+                    chars.next();
+                }
 
-                    // No fraction -> Token::Period
-                    if s == "." {
+                // If the dialect supports identifiers that start with a numeric prefix
+                // and we have now consumed a dot, check if the previous token was a Word.
+                // If so, what follows is definitely not part of a decimal number and
+                // we should yield the dot as a dedicated token so compound identifiers
+                // starting with digits can be parsed correctly.
+                if s == "." && self.dialect.supports_numeric_prefix() {
+                    if !preceded_by_whitespace
+                        && !matches!(prev_token, Some(Token::Plus | Token::Minus))
+                    {
                         return Ok(Some(Token::Period));
                     }
+                }
 
-                    // Parse exponent as number
-                    let mut exponent_part = String::new();
-                    if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
-                        let mut char_clone = chars.peekable.clone();
-                        exponent_part.push(char_clone.next().unwrap());
-
-                        // Optional sign
-                        match char_clone.peek() {
-                            Some(&c) if matches!(c, '+' | '-') => {
-                                exponent_part.push(c);
-                                char_clone.next();
-                            }
-                            _ => (),
-                        }
+                // Consume fractional digits.
+                s += &peeking_next_take_while(chars, |ch, next_ch| {
+                    ch.is_ascii_digit() || is_number_separator(ch, next_ch)
+                });
 
-                        match char_clone.peek() {
-                            // Definitely an exponent, get original iterator up to speed and use it
-                            Some(&c) if c.is_ascii_digit() => {
-                                for _ in 0..exponent_part.len() {
-                                    chars.next();
-                                }
-                                exponent_part +=
-                                    &peeking_take_while(chars, |ch| ch.is_ascii_digit());
-                                s += exponent_part.as_str();
-                            }
-                            // Not an exponent, discard the work done
-                            _ => (),
+                // No fraction -> Token::Period
+                if s == "." {
+                    return Ok(Some(Token::Period));
+                }
+
+                // Parse exponent as number
+                let mut exponent_part = String::new();
+                if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
+                    let mut char_clone = chars.peekable.clone();
+                    exponent_part.push(char_clone.next().unwrap());
+
+                    // Optional sign
+                    match char_clone.peek() {
+                        Some(&c) if matches!(c, '+' | '-') => {
+                            exponent_part.push(c);
+                            char_clone.next();
                         }
+                        _ => (),
                     }
 
-                    // If the dialect supports identifiers that start with a numeric prefix,
-                    // we need to check if the value is in fact an identifier and must thus
-                    // be tokenized as a word.
-                    if self.dialect.supports_numeric_prefix() {
-                        if exponent_part.is_empty() {
-                            // If it is not a number with an exponent, it may be
-                            // an identifier starting with digits.
-                            let word =
-                                peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch));
-
-                            if !word.is_empty() {
-                                s += word.as_str();
-                                return Ok(Some(Token::make_word(s, None)));
+                    match char_clone.peek() {
+                        // Definitely an exponent, get original iterator up to speed and use it
+                        Some(&c) if c.is_ascii_digit() => {
+                            for _ in 0..exponent_part.len() {
+                                chars.next();
                             }
-                        } else if prev_token == Some(&Token::Period) {
-                            // If the previous token was a period, thus not belonging to a number,
-                            // the value we have is part of an identifier.
-                            return Ok(Some(Token::make_word(s, None)));
+                            exponent_part += &peeking_take_while(chars, |ch| ch.is_ascii_digit());
+                            s += exponent_part.as_str();
                         }
+                        // Not an exponent, discard the work done
+                        _ => (),
                     }
-
-                    let long = if chars.peek() == Some(&'L') {
-                        chars.next();
-                        true
-                    } else {
-                        false
-                    };
-                    Ok(Some(Token::Number(s, long)))
                 }
-                // punctuation
-                '(' => self.consume_and_return(chars, Token::LParen),
-                ')' => self.consume_and_return(chars, Token::RParen),
-                ',' => self.consume_and_return(chars, Token::Comma),
-                // operators
-                '-' => {
-                    chars.next(); // consume the '-'
-
-                    match chars.peek() {
-                        Some('-') => {
-                            let mut is_comment = true;
-                            if self.dialect.requires_single_line_comment_whitespace() {
-                                is_comment = Some(' ') == chars.peekable.clone().nth(1);
-                            }
-
-                            if is_comment {
-                                self.handle_colon_space_error(chars, prev_token)?;
-                                chars.next(); // consume second '-'
-                                              // Consume the rest of the line as comment
-                                let _comment = self.tokenize_single_line_comment(chars);
-                                *location = chars.location();
-                                return self.next_token(location, chars, prev_token, true);
-                            }
 
-                            self.start_binop(chars, "-", Token::Minus)
-                        }
-                        Some('>') => {
-                            chars.next();
-                            match chars.peek() {
-                                Some('>') => self.consume_for_binop(chars, "->>", Token::LongArrow),
-                                _ => self.start_binop(chars, "->", Token::Arrow),
-                            }
+                // If the dialect supports identifiers that start with a numeric prefix,
+                // we need to check if the value is in fact an identifier and must thus
+                // be tokenized as a word.
+                if self.dialect.supports_numeric_prefix() {
+                    if exponent_part.is_empty() {
+                        // If it is not a number with an exponent, it may be
+                        // an identifier starting with digits.
+                        let word =
+                            peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch));
+
+                        if !word.is_empty() {
+                            s += word.as_str();
+                            return Ok(Some(Token::make_word(s, None)));
                         }
-                        // a regular '-' operator
-                        _ => self.start_binop(chars, "-", Token::Minus),
+                    } else if prev_token == Some(&Token::Period) {
+                        // If the previous token was a period, thus not belonging to a number,
+                        // the value we have is part of an identifier.
+                        return Ok(Some(Token::make_word(s, None)));
                     }
                 }
-                '/' => {
-                    chars.next(); // consume the '/'
-                    match chars.peek() {
-                        Some('*') => {
-                            self.handle_colon_space_error(chars, prev_token)?;
-                            chars.next(); // consume the '*', starting a multi-line comment
-                            let _comment = self.consume_multiline_comment(chars)?;
-                            *location = chars.location();
-                            self.next_token(location, chars, prev_token, true)
+
+                let long = if chars.peek() == Some(&'L') {
+                    chars.next();
+                    true
+                } else {
+                    false
+                };
+                Ok(Some(Token::Number(s, long)))
+            }
+            // punctuation
+            '(' => self.consume_and_return(chars, Token::LParen),
+            ')' => self.consume_and_return(chars, Token::RParen),
+            ',' => self.consume_and_return(chars, Token::Comma),
+            // operators
+            '-' => {
+                chars.next(); // consume the '-'
+
+                match chars.peek() {
+                    Some('-') => {
+                        let mut is_comment = true;
+                        if self.dialect.requires_single_line_comment_whitespace() {
+                            is_comment = Some(' ') == chars.peekable.clone().nth(1);
                         }
-                        Some('/') if dialect_of!(self is SnowflakeDialect) => {
-                            self.handle_colon_space_error(chars, prev_token)?;
-                            chars.next(); // consume the second '/', starting a snowflake single-line comment
+
+                        if is_comment {
+                            self.handle_colon_space_error(chars, prev_token, true)?;
+                            chars.next(); // consume second '-'
                                           // Consume the rest of the line as comment
                             let _comment = self.tokenize_single_line_comment(chars);
                             *location = chars.location();
-                            self.next_token(location, chars, prev_token, true)
+                            return self.next_token(
+                                location,
+                                chars,
+                                prev_token,
+                                prev_keyword,
+                                true,
+                            );
                         }
-                        Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
-                            self.consume_and_return(chars, Token::DuckIntDiv)
+
+                        self.start_binop(chars, "-", Token::Minus)
+                    }
+                    Some('>') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('>') => self.consume_for_binop(chars, "->>", Token::LongArrow),
+                            _ => self.start_binop(chars, "->", Token::Arrow),
                         }
-                        // a regular '/' operator
-                        _ => Ok(Some(Token::Div)),
                     }
+                    // a regular '-' operator
+                    _ => self.start_binop(chars, "-", Token::Minus),
                 }
-                '+' => self.consume_and_return(chars, Token::Plus),
-                '*' => self.consume_and_return(chars, Token::Mul),
-                '%' => {
-                    chars.next(); // advance past '%'
-                    match chars.peek() {
-                        Some(s) if s.is_whitespace() => Ok(Some(Token::Mod)),
-                        Some(sch) if self.dialect.is_identifier_start('%') => {
-                            self.tokenize_identifier_or_keyword([ch, *sch], chars)
-                        }
-                        _ => self.start_binop(chars, "%", Token::Mod),
+            }
+            '/' => {
+                chars.next(); // consume the '/'
+                match chars.peek() {
+                    Some('*') => {
+                        self.handle_colon_space_error(chars, prev_token, true)?;
+                        chars.next(); // consume the '*', starting a multi-line comment
+                        let _comment = self.consume_multiline_comment(chars)?;
+                        *location = chars.location();
+                        self.next_token(location, chars, prev_token, prev_keyword, true)
                     }
+                    Some('/') if dialect_of!(self is SnowflakeDialect) => {
+                        self.handle_colon_space_error(chars, prev_token, true)?;
+                        chars.next(); // consume the second '/', starting a snowflake single-line comment
+                                      // Consume the rest of the line as comment
+                        let _comment = self.tokenize_single_line_comment(chars);
+                        *location = chars.location();
+                        self.next_token(location, chars, prev_token, prev_keyword, true)
+                    }
+                    Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
+                        self.consume_and_return(chars, Token::DuckIntDiv)
+                    }
+                    // a regular '/' operator
+                    _ => Ok(Some(Token::Div)),
                 }
-                '|' => {
-                    chars.next(); // consume the '|'
-                    match chars.peek() {
-                        Some('/') => self.consume_for_binop(chars, "|/", Token::PGSquareRoot),
-                        Some('|') => {
-                            chars.next(); // consume the second '|'
-                            match chars.peek() {
-                                Some('/') => {
-                                    self.consume_for_binop(chars, "||/", Token::PGCubeRoot)
-                                }
-                                _ => self.start_binop(chars, "||", Token::StringConcat),
-                            }
+            }
+            '+' => self.consume_and_return(chars, Token::Plus),
+            '*' => self.consume_and_return(chars, Token::Mul),
+            '%' => {
+                chars.next(); // advance past '%'
+                match chars.peek() {
+                    Some(s) if s.is_whitespace() => Ok(Some(Token::Mod)),
+                    Some(sch) if self.dialect.is_identifier_start('%') => {
+                        self.tokenize_identifier_or_keyword([ch, *sch], chars, prev_keyword)
+                    }
+                    _ => self.start_binop(chars, "%", Token::Mod),
+                }
+            }
+            '|' => {
+                chars.next(); // consume the '|'
+                match chars.peek() {
+                    Some('/') => self.consume_for_binop(chars, "|/", Token::PGSquareRoot),
+                    Some('|') => {
+                        chars.next(); // consume the second '|'
+                        match chars.peek() {
+                            Some('/') => self.consume_for_binop(chars, "||/", Token::PGCubeRoot),
+                            _ => self.start_binop(chars, "||", Token::StringConcat),
                         }
-                        Some('&') if self.dialect.supports_geometric_types() => {
-                            chars.next(); // consume
-                            match chars.peek() {
-                                Some('>') => self.consume_for_binop(
-                                    chars,
-                                    "|&>",
-                                    Token::VerticalBarAmpersandRightAngleBracket,
-                                ),
-                                _ => self.start_binop_opt(chars, "|&", None),
-                            }
+                    }
+                    Some('&') if self.dialect.supports_geometric_types() => {
+                        chars.next(); // consume
+                        match chars.peek() {
+                            Some('>') => self.consume_for_binop(
+                                chars,
+                                "|&>",
+                                Token::VerticalBarAmpersandRightAngleBracket,
+                            ),
+                            _ => self.start_binop_opt(chars, "|&", None),
                         }
-                        Some('>') if self.dialect.supports_geometric_types() => {
-                            chars.next(); // consume
-                            match chars.peek() {
-                                Some('>') => self.consume_for_binop(
-                                    chars,
-                                    "|>>",
-                                    Token::VerticalBarShiftRight,
-                                ),
-                                _ => self.start_binop_opt(chars, "|>", None),
+                    }
+                    Some('>') if self.dialect.supports_geometric_types() => {
+                        chars.next(); // consume
+                        match chars.peek() {
+                            Some('>') => {
+                                self.consume_for_binop(chars, "|>>", Token::VerticalBarShiftRight)
                             }
+                            _ => self.start_binop_opt(chars, "|>", None),
                         }
-                        Some('>') if self.dialect.supports_pipe_operator() => {
-                            self.consume_for_binop(chars, "|>", Token::VerticalBarRightAngleBracket)
-                        }
-                        // Bitshift '|' operator
-                        _ => self.start_binop(chars, "|", Token::Pipe),
                     }
-                }
-                '=' => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('>') => self.consume_and_return(chars, Token::RArrow),
-                        Some('=') => self.consume_and_return(chars, Token::DoubleEq),
-                        _ => Ok(Some(Token::Eq)),
+                    Some('>') if self.dialect.supports_pipe_operator() => {
+                        self.consume_for_binop(chars, "|>", Token::VerticalBarRightAngleBracket)
                     }
+                    // Bitshift '|' operator
+                    _ => self.start_binop(chars, "|", Token::Pipe),
                 }
-                '!' => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('=') => self.consume_and_return(chars, Token::Neq),
-                        Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
-                        Some('~') => {
-                            chars.next();
-                            match chars.peek() {
-                                Some('*') => self
-                                    .consume_and_return(chars, Token::ExclamationMarkTildeAsterisk),
-                                Some('~') => {
-                                    chars.next();
-                                    match chars.peek() {
-                                        Some('*') => self.consume_and_return(
-                                            chars,
-                                            Token::ExclamationMarkDoubleTildeAsterisk,
-                                        ),
-                                        _ => Ok(Some(Token::ExclamationMarkDoubleTilde)),
-                                    }
-                                }
-                                _ => Ok(Some(Token::ExclamationMarkTilde)),
-                            }
-                        }
-                        _ => Ok(Some(Token::ExclamationMark)),
-                    }
+            }
+            '=' => {
+                chars.next(); // consume
+                match chars.peek() {
+                    Some('>') => self.consume_and_return(chars, Token::RArrow),
+                    Some('=') => self.consume_and_return(chars, Token::DoubleEq),
+                    _ => Ok(Some(Token::Eq)),
                 }
-                '<' => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('=') => {
-                            chars.next();
-                            match chars.peek() {
-                                Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship),
-                                _ => self.start_binop(chars, "<=", Token::LtEq),
-                            }
-                        }
-                        Some('|') if self.dialect.supports_geometric_types() => {
-                            self.consume_for_binop(chars, "<<|", Token::ShiftLeftVerticalBar)
-                        }
-                        Some('>') => self.consume_for_binop(chars, "<>", Token::Neq),
-                        Some('<') if self.dialect.supports_geometric_types() => {
-                            chars.next(); // consume
-                            match chars.peek() {
-                                Some('|') => self.consume_for_binop(
-                                    chars,
-                                    "<<|",
-                                    Token::ShiftLeftVerticalBar,
-                                ),
-                                _ => self.start_binop(chars, "<<", Token::ShiftLeft),
+            }
+            '!' => {
+                chars.next(); // consume
+                match chars.peek() {
+                    Some('=') => self.consume_and_return(chars, Token::Neq),
+                    Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
+                    Some('~') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('*') => {
+                                self.consume_and_return(chars, Token::ExclamationMarkTildeAsterisk)
                             }
-                        }
-                        Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft),
-                        Some('-') if self.dialect.supports_geometric_types() => {
-                            chars.next(); // consume
-                            match chars.peek() {
-                                Some('>') => {
-                                    self.consume_for_binop(chars, "<->", Token::TwoWayArrow)
+                            Some('~') => {
+                                chars.next();
+                                match chars.peek() {
+                                    Some('*') => self.consume_and_return(
+                                        chars,
+                                        Token::ExclamationMarkDoubleTildeAsterisk,
+                                    ),
+                                    _ => Ok(Some(Token::ExclamationMarkDoubleTilde)),
                                 }
-                                _ => self.start_binop_opt(chars, "<-", None),
                             }
+                            _ => Ok(Some(Token::ExclamationMarkTilde)),
                         }
-                        Some('^') if self.dialect.supports_geometric_types() => {
-                            self.consume_for_binop(chars, "<^", Token::LeftAngleBracketCaret)
-                        }
-                        Some('@') => self.consume_for_binop(chars, "<@", Token::ArrowAt),
-                        _ => self.start_binop(chars, "<", Token::Lt),
                     }
+                    _ => Ok(Some(Token::ExclamationMark)),
                 }
-                '>' => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('=') => self.consume_for_binop(chars, ">=", Token::GtEq),
-                        Some('>') => self.consume_for_binop(chars, ">>", Token::ShiftRight),
-                        Some('^') if self.dialect.supports_geometric_types() => {
-                            self.consume_for_binop(chars, ">^", Token::RightAngleBracketCaret)
+            }
+            '<' => {
+                chars.next(); // consume
+                match chars.peek() {
+                    Some('=') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship),
+                            _ => self.start_binop(chars, "<=", Token::LtEq),
                         }
-                        _ => self.start_binop(chars, ">", Token::Gt),
                     }
-                }
-                ':' => {
-                    chars.next();
-                    match chars.peek() {
-                        Some(':') => self.consume_and_return(chars, Token::DoubleColon),
-                        Some('=') => self.consume_and_return(chars, Token::Assignment),
-                        _ => Ok(Some(Token::Colon)),
+                    Some('|') if self.dialect.supports_geometric_types() => {
+                        self.consume_for_binop(chars, "<<|", Token::ShiftLeftVerticalBar)
                     }
-                }
-                ';' => self.consume_and_return(chars, Token::SemiColon),
-                '\\' => self.consume_and_return(chars, Token::Backslash),
-                '[' => self.consume_and_return(chars, Token::LBracket),
-                ']' => self.consume_and_return(chars, Token::RBracket),
-                '&' => {
-                    chars.next(); // consume the '&'
-                    match chars.peek() {
-                        Some('>') if self.dialect.supports_geometric_types() => {
-                            chars.next();
-                            self.consume_and_return(chars, Token::AmpersandRightAngleBracket)
-                        }
-                        Some('<') if self.dialect.supports_geometric_types() => {
-                            chars.next(); // consume
-                            match chars.peek() {
-                                Some('|') => self.consume_and_return(
-                                    chars,
-                                    Token::AmpersandLeftAngleBracketVerticalBar,
-                                ),
-                                _ => {
-                                    self.start_binop(chars, "&<", Token::AmpersandLeftAngleBracket)
-                                }
+                    Some('>') => self.consume_for_binop(chars, "<>", Token::Neq),
+                    Some('<') if self.dialect.supports_geometric_types() => {
+                        chars.next(); // consume
+                        match chars.peek() {
+                            Some('|') => {
+                                self.consume_for_binop(chars, "<<|", Token::ShiftLeftVerticalBar)
                             }
+                            _ => self.start_binop(chars, "<<", Token::ShiftLeft),
                         }
-                        Some('&') => {
-                            chars.next(); // consume the second '&'
-                            self.start_binop(chars, "&&", Token::Overlap)
+                    }
+                    Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft),
+                    Some('-') if self.dialect.supports_geometric_types() => {
+                        chars.next(); // consume
+                        match chars.peek() {
+                            Some('>') => self.consume_for_binop(chars, "<->", Token::TwoWayArrow),
+                            _ => self.start_binop_opt(chars, "<-", None),
                         }
-                        // Bitshift '&' operator
-                        _ => self.start_binop(chars, "&", Token::Ampersand),
                     }
+                    Some('^') if self.dialect.supports_geometric_types() => {
+                        self.consume_for_binop(chars, "<^", Token::LeftAngleBracketCaret)
+                    }
+                    Some('@') => self.consume_for_binop(chars, "<@", Token::ArrowAt),
+                    _ => self.start_binop(chars, "<", Token::Lt),
                 }
-                '^' => {
-                    chars.next(); // consume the '^'
-                    match chars.peek() {
-                        Some('@') => self.consume_and_return(chars, Token::CaretAt),
-                        _ => Ok(Some(Token::Caret)),
+            }
+            '>' => {
+                chars.next(); // consume
+                match chars.peek() {
+                    Some('=') => self.consume_for_binop(chars, ">=", Token::GtEq),
+                    Some('>') => self.consume_for_binop(chars, ">>", Token::ShiftRight),
+                    Some('^') if self.dialect.supports_geometric_types() => {
+                        self.consume_for_binop(chars, ">^", Token::RightAngleBracketCaret)
                     }
+                    _ => self.start_binop(chars, ">", Token::Gt),
                 }
-                '{' => self.consume_and_return(chars, Token::LBrace),
-                '}' => self.consume_and_return(chars, Token::RBrace),
-                '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
-                {
-                    self.handle_colon_space_error(chars, prev_token)?;
-                    chars.next(); // consume the '#', starting a snowflake single-line comment
-                                  // Consume the rest of the line as comment
-                    let _comment = self.tokenize_single_line_comment(chars);
-                    *location = chars.location();
-                    self.next_token(location, chars, prev_token, true)
+            }
+            ':' => {
+                chars.next();
+                match chars.peek() {
+                    Some(':') => self.consume_and_return(chars, Token::DoubleColon),
+                    Some('=') => self.consume_and_return(chars, Token::Assignment),
+                    _ => Ok(Some(Token::Colon)),
                 }
-                '~' => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('*') => self.consume_for_binop(chars, "~*", Token::TildeAsterisk),
-                        Some('=') if self.dialect.supports_geometric_types() => {
-                            self.consume_for_binop(chars, "~=", Token::TildeEqual)
-                        }
-                        Some('~') => {
-                            chars.next();
-                            match chars.peek() {
-                                Some('*') => {
-                                    self.consume_for_binop(chars, "~~*", Token::DoubleTildeAsterisk)
-                                }
-                                _ => self.start_binop(chars, "~~", Token::DoubleTilde),
-                            }
+            }
+            ';' => self.consume_and_return(chars, Token::SemiColon),
+            '\\' => self.consume_and_return(chars, Token::Backslash),
+            '[' => self.consume_and_return(chars, Token::LBracket),
+            ']' => self.consume_and_return(chars, Token::RBracket),
+            '&' => {
+                chars.next(); // consume the '&'
+                match chars.peek() {
+                    Some('>') if self.dialect.supports_geometric_types() => {
+                        chars.next();
+                        self.consume_and_return(chars, Token::AmpersandRightAngleBracket)
+                    }
+                    Some('<') if self.dialect.supports_geometric_types() => {
+                        chars.next(); // consume
+                        match chars.peek() {
+                            Some('|') => self.consume_and_return(
+                                chars,
+                                Token::AmpersandLeftAngleBracketVerticalBar,
+                            ),
+                            _ => self.start_binop(chars, "&<", Token::AmpersandLeftAngleBracket),
                         }
-                        _ => self.start_binop(chars, "~", Token::Tilde),
                     }
+                    Some('&') => {
+                        chars.next(); // consume the second '&'
+                        self.start_binop(chars, "&&", Token::Overlap)
+                    }
+                    // Bitshift '&' operator
+                    _ => self.start_binop(chars, "&", Token::Ampersand),
                 }
-                '#' => {
-                    chars.next();
-                    match chars.peek() {
-                        Some('-') => self.consume_for_binop(chars, "#-", Token::HashMinus),
-                        Some('>') => {
-                            chars.next();
-                            match chars.peek() {
-                                Some('>') => {
-                                    self.consume_for_binop(chars, "#>>", Token::HashLongArrow)
-                                }
-                                _ => self.start_binop(chars, "#>", Token::HashArrow),
+            }
+            '^' => {
+                chars.next(); // consume the '^'
+                match chars.peek() {
+                    Some('@') => self.consume_and_return(chars, Token::CaretAt),
+                    _ => Ok(Some(Token::Caret)),
+                }
+            }
+            '{' => self.consume_and_return(chars, Token::LBrace),
+            '}' => self.consume_and_return(chars, Token::RBrace),
+            '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
+            {
+                self.handle_colon_space_error(chars, prev_token, true)?;
+                chars.next(); // consume the '#', starting a snowflake single-line comment
+                              // Consume the rest of the line as comment
+                let _comment = self.tokenize_single_line_comment(chars);
+                *location = chars.location();
+                self.next_token(location, chars, prev_token, prev_keyword, true)
+            }
+            '~' => {
+                chars.next(); // consume
+                match chars.peek() {
+                    Some('*') => self.consume_for_binop(chars, "~*", Token::TildeAsterisk),
+                    Some('=') if self.dialect.supports_geometric_types() => {
+                        self.consume_for_binop(chars, "~=", Token::TildeEqual)
+                    }
+                    Some('~') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('*') => {
+                                self.consume_for_binop(chars, "~~*", Token::DoubleTildeAsterisk)
                             }
+                            _ => self.start_binop(chars, "~~", Token::DoubleTilde),
                         }
-                        Some(' ') => Ok(Some(Token::Sharp)),
-                        Some('#') if self.dialect.supports_geometric_types() => {
-                            self.consume_for_binop(chars, "##", Token::DoubleSharp)
-                        }
-                        Some(sch) if self.dialect.is_identifier_start('#') => {
-                            self.tokenize_identifier_or_keyword([ch, *sch], chars)
-                        }
-                        _ => self.start_binop(chars, "#", Token::Sharp),
                     }
+                    _ => self.start_binop(chars, "~", Token::Tilde),
                 }
-                '@' => {
-                    chars.next();
-                    match chars.peek() {
-                        Some('@') if self.dialect.supports_geometric_types() => {
-                            self.consume_and_return(chars, Token::AtAt)
-                        }
-                        Some('-') if self.dialect.supports_geometric_types() => {
-                            chars.next();
-                            match chars.peek() {
-                                Some('@') => self.consume_and_return(chars, Token::AtDashAt),
-                                _ => self.start_binop_opt(chars, "@-", None),
-                            }
-                        }
-                        Some('>') => self.consume_and_return(chars, Token::AtArrow),
-                        Some('?') => self.consume_and_return(chars, Token::AtQuestion),
-                        Some('@') => {
-                            chars.next();
-                            match chars.peek() {
-                                Some(' ') => Ok(Some(Token::AtAt)),
-                                Some(tch) if self.dialect.is_identifier_start('@') => {
-                                    self.tokenize_identifier_or_keyword([ch, '@', *tch], chars)
-                                }
-                                _ => Ok(Some(Token::AtAt)),
-                            }
-                        }
-                        Some(' ') => Ok(Some(Token::AtSign)),
-                        // We break on quotes here, because no dialect allows identifiers starting
-                        // with @ and containing quotation marks (e.g. `@'foo'`) unless they are
-                        // quoted, which is tokenized as a quoted string, not here (e.g.
-                        // `"@'foo'"`). Further, at least two dialects parse `@` followed by a
-                        // quoted string as two separate tokens, which this allows. For example,
-                        // Postgres parses `@'1'` as the absolute value of '1' which is implicitly
-                        // cast to a numeric type. And when parsing MySQL-style grantees (e.g.
-                        // `GRANT ALL ON *.* to 'root'@'localhost'`), we also want separate tokens
-                        // for the user, the `@`, and the host.
-                        Some('\'') => Ok(Some(Token::AtSign)),
-                        Some('\"') => Ok(Some(Token::AtSign)),
-                        Some('`') => Ok(Some(Token::AtSign)),
-                        Some(sch) if self.dialect.is_identifier_start('@') => {
-                            self.tokenize_identifier_or_keyword([ch, *sch], chars)
+            }
+            '#' => {
+                chars.next();
+                match chars.peek() {
+                    Some('-') => self.consume_for_binop(chars, "#-", Token::HashMinus),
+                    Some('>') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('>') => self.consume_for_binop(chars, "#>>", Token::HashLongArrow),
+                            _ => self.start_binop(chars, "#>", Token::HashArrow),
                         }
-                        _ => Ok(Some(Token::AtSign)),
                     }
+                    Some(' ') => Ok(Some(Token::Sharp)),
+                    Some('#') if self.dialect.supports_geometric_types() => {
+                        self.consume_for_binop(chars, "##", Token::DoubleSharp)
+                    }
+                    Some(sch) if self.dialect.is_identifier_start('#') => {
+                        self.tokenize_identifier_or_keyword([ch, *sch], chars, prev_keyword)
+                    }
+                    _ => self.start_binop(chars, "#", Token::Sharp),
                 }
-                // Postgres uses ? for jsonb operators, not prepared statements
-                '?' if self.dialect.supports_geometric_types() => {
-                    chars.next(); // consume
-                    match chars.peek() {
-                        Some('|') => {
-                            chars.next();
-                            match chars.peek() {
-                                Some('|') => self.consume_and_return(
+            }
+            '@' => {
+                chars.next();
+                match chars.peek() {
+                    Some('@') if self.dialect.supports_geometric_types() => {
+                        self.consume_and_return(chars, Token::AtAt)
+                    }
+                    Some('-') if self.dialect.supports_geometric_types() => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('@') => self.consume_and_return(chars, Token::AtDashAt),
+                            _ => self.start_binop_opt(chars, "@-", None),
+                        }
+                    }
+                    Some('>') => self.consume_and_return(chars, Token::AtArrow),
+                    Some('?') => self.consume_and_return(chars, Token::AtQuestion),
+                    Some('@') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some(' ') => Ok(Some(Token::AtAt)),
+                            Some(tch) if self.dialect.is_identifier_start('@') => self
+                                .tokenize_identifier_or_keyword(
+                                    [ch, '@', *tch],
                                     chars,
-                                    Token::QuestionMarkDoubleVerticalBar,
+                                    prev_keyword,
                                 ),
-                                _ => Ok(Some(Token::QuestionPipe)),
+                            _ => Ok(Some(Token::AtAt)),
+                        }
+                    }
+                    Some(' ') => Ok(Some(Token::AtSign)),
+                    // We break on quotes here, because no dialect allows identifiers starting
+                    // with @ and containing quotation marks (e.g. `@'foo'`) unless they are
+                    // quoted, which is tokenized as a quoted string, not here (e.g.
+                    // `"@'foo'"`). Further, at least two dialects parse `@` followed by a
+                    // quoted string as two separate tokens, which this allows. For example,
+                    // Postgres parses `@'1'` as the absolute value of '1' which is implicitly
+                    // cast to a numeric type. And when parsing MySQL-style grantees (e.g.
+                    // `GRANT ALL ON *.* to 'root'@'localhost'`), we also want separate tokens
+                    // for the user, the `@`, and the host.
+                    Some('\'') => Ok(Some(Token::AtSign)),
+                    Some('\"') => Ok(Some(Token::AtSign)),
+                    Some('`') => Ok(Some(Token::AtSign)),
+                    Some(sch) if self.dialect.is_identifier_start('@') => {
+                        self.tokenize_identifier_or_keyword([ch, *sch], chars, prev_keyword)
+                    }
+                    _ => Ok(Some(Token::AtSign)),
+                }
+            }
+            // Postgres uses ? for jsonb operators, not prepared statements
+            '?' if self.dialect.supports_geometric_types() => {
+                chars.next(); // consume
+                match chars.peek() {
+                    Some('|') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('|') => {
+                                self.consume_and_return(chars, Token::QuestionMarkDoubleVerticalBar)
                             }
+                            _ => Ok(Some(Token::QuestionPipe)),
                         }
+                    }
 
-                        Some('&') => self.consume_and_return(chars, Token::QuestionAnd),
-                        Some('-') => {
-                            chars.next(); // consume
-                            match chars.peek() {
-                                Some('|') => self
-                                    .consume_and_return(chars, Token::QuestionMarkDashVerticalBar),
-                                _ => Ok(Some(Token::QuestionMarkDash)),
+                    Some('&') => self.consume_and_return(chars, Token::QuestionAnd),
+                    Some('-') => {
+                        chars.next(); // consume
+                        match chars.peek() {
+                            Some('|') => {
+                                self.consume_and_return(chars, Token::QuestionMarkDashVerticalBar)
                             }
+                            _ => Ok(Some(Token::QuestionMarkDash)),
                         }
-                        Some('#') => self.consume_and_return(chars, Token::QuestionMarkSharp),
-                        _ => self.consume_and_return(chars, Token::Question),
                     }
+                    Some('#') => self.consume_and_return(chars, Token::QuestionMarkSharp),
+                    _ => self.consume_and_return(chars, Token::Question),
                 }
-                '?' => {
-                    chars.next();
-                    let s = peeking_take_while(chars, |ch| ch.is_numeric());
-                    Ok(Some(Token::Placeholder(String::from("?") + &s)))
-                }
-
-                // identifier or keyword
-                ch if self.dialect.is_identifier_start(ch) => {
-                    self.tokenize_identifier_or_keyword([ch], chars)
-                }
-                '$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),
+            }
+            '?' => {
+                chars.next();
+                let s = peeking_take_while(chars, |ch| ch.is_numeric());
+                Ok(Some(Token::Placeholder(String::from("?") + &s)))
+            }
 
-                // whitespace check (including unicode chars) should be last as it covers some of the chars above
-                ch if ch.is_whitespace() => {
-                    self.handle_colon_space_error(chars, prev_token)?;
-                    chars.next(); // consume
-                    *location = chars.location();
-                    self.next_token(location, chars, prev_token, true)
-                }
-                other => self.consume_and_return(chars, Token::Char(other)),
-            },
-            None => Ok(None),
+            // identifier or keyword
+            ch if self.dialect.is_identifier_start(ch) => {
+                self.tokenize_identifier_or_keyword([ch], chars, prev_keyword)
+            }
+            '$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),
+
+            // whitespace check (including unicode chars) should be last as it covers some of the chars above
+            ch if ch.is_whitespace() => {
+                self.handle_colon_space_error(chars, prev_token, preceded_by_whitespace)?;
+                chars.next(); // consume
+                *location = chars.location();
+                self.next_token(location, chars, prev_token, prev_keyword, true)
+            }
+            other => self.consume_and_return(chars, Token::Char(other)),
         }
     }
 
@@ -1951,12 +1959,47 @@ impl<'a> Tokenizer<'a> {
     }
 
     /// Tokenize an identifier or keyword, after the first char is already consumed.
-    fn tokenize_word(&self, first_chars: impl Into<String>, chars: &mut State) -> String {
+    fn tokenize_word(
+        &self,
+        first_chars: impl Into<String>,
+        chars: &mut State,
+        prev_keyword: Option<Keyword>,
+    ) -> Result<String, TokenizerError> {
         let mut s = first_chars.into();
         s.push_str(&peeking_take_while(chars, |ch| {
-            self.dialect.is_identifier_part(ch) || ch == '-' && self.dialect.is_identifier_part('-')
+            self.dialect.is_identifier_part(ch)
         }));
-        s
+        if !matches!(prev_keyword, Some(Keyword::SELECT))
+            && self.dialect.supports_hyphenated_identifiers()
+        {
+            while chars.peek() == Some(&'-') {
+                chars.next(); // consume the '-'
+                let mut alphabetic_characters = false;
+                let mut new_identifier = String::new();
+                new_identifier.push_str(&peeking_take_while(chars, |ch| {
+                    alphabetic_characters |= ch.is_alphabetic();
+                    self.dialect.is_identifier_part(ch)
+                }));
+
+                if let Some(ch) = new_identifier.chars().next() {
+                    if ch.is_numeric() && alphabetic_characters {
+                        return self.tokenizer_error(
+                            chars.location(),
+                            "Identifier cannot start with a digit and contain alphabetic characters after hyphen",
+                        );
+                    }
+                } else {
+                    // No characters after the hyphen, meaning it's not a valid identifier.
+                    return self
+                        .tokenizer_error(chars.location(), "Identifier cannot end with a hyphen");
+                }
+
+                s.push('-');
+                s.push_str(&new_identifier);
+            }
+        }
+
+        Ok(s)
     }
 
     /// Read a quoted identifier
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index 99b7ac3fa..8aa99dcae 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -3589,6 +3589,7 @@ fn test_double_value() {
 
     for (input, expected) in test_cases {
         for (i, expr) in input.iter().enumerate() {
+            println!("Testing expression: {}", expr);
             if let Statement::Query(query) =
                 dialects.one_statement_parses_to(&format!("SELECT {expr}"), "")
             {
diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs
index bcc154287..bd337a96d 100644
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@@ -1014,27 +1014,37 @@ fn parse_drop_schema_if_exists() {
 
 #[test]
 fn parse_copy_from_stdin() {
-    let sql = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM stdin;
-1	PENELOPE	GUINESS	2006-02-15 09:34:33 0.11111
-2	NICK	WAHLBERG	2006-02-15 09:34:33 0.22222
-3	ED	CHASE	2006-02-15 09:34:33 0.312323
-4	JENNIFER	DAVIS	2006-02-15 09:34:33 0.3232
-5	JOHNNY	LOLLOBRIGIDA	2006-02-15 09:34:33 1.343
-6	BETTE	NICHOLSON	2006-02-15 09:34:33 5.0
-7	GRACE	MOSTEL	2006-02-15 09:34:33 6.0
-8	MATTHEW	JOHANSSON	2006-02-15 09:34:33 7.0
-9	JOE	SWANK	2006-02-15 09:34:33 8.0
-10	CHRISTIAN	GABLE	2006-02-15 09:34:33 9.1
-11	ZERO	CAGE	2006-02-15 09:34:33 10.001
-12	KARL	BERRY	2017-11-02 19:15:42.308637+08 11.001
-A Fateful Reflection of a Waitress And a Boat who must Discover a Sumo Wrestler in Ancient China
-Kwara & Kogi
-{"Deleted Scenes","Behind the Scenes"}
-'awe':5 'awe-inspir':4 'barbarella':1 'cat':13 'conquer':16 'dog':18 'feminist':10 'inspir':6 'monasteri':21 'must':15 'stori':7 'streetcar':2
-PHP	₱ USD $
-\N  Some other value
-\\."#;
-    pg_and_generic().one_statement_parses_to(sql, "");
+    let sql = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN;
+1	PENELOPE	GUINESS	2006-02-15 09:34:33	0.11111
+2	NICK	WAHLBERG	2006-02-15 09:34:33	0.22222
+3	ED	CHASE	2006-02-15 09:34:33	0.312323
+4	JENNIFER	DAVIS	2006-02-15 09:34:33	0.3232
+5	JOHNNY	LOLLOBRIGIDA	2006-02-15 09:34:33	1.343
+6	BETTE	NICHOLSON	2006-02-15 09:34:33	5.0
+7	GRACE	MOSTEL	2006-02-15 09:34:33	6.0
+8	MATTHEW	JOHANSSON	2006-02-15 09:34:33	7.0
+9	JOE	SWANK	2006-02-15 09:34:33	8.0
+10	CHRISTIAN	GABLE	2006-02-15 09:34:33	9.1
+11	ZERO	CAGE	2006-02-15 09:34:33	10.001
+12	KARL	BERRY	2017-11-02 19:15:42.308637+08	11.001
+\."#;
+    pg_and_generic().verified_stmt(sql);
+
+    let sql_comma_separated = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN (FORMAT csv, DELIMITER ',');
+1,PENELOPE,GUINESS,2006-02-15 09:34:33,0.11111
+2,NICK,WAHLBERG,2006-02-15 09:34:33,0.22222
+3,ED,CHASE,2006-02-15 09:34:33,0.312323
+4,JENNIFER,DAVIS,2006-02-15 09:34:33,0.3232
+5,JOHNNY,"LOLLO,BRIGIDA",2006-02-15 09:34:33,1.343
+6,BETTE,NICHOLSON,2006-02-15 09:34:33,5.0
+7,GRACE,MOSTEL,2006-02-15 09:34:33,6.0
+8,MATTHEW,JOHANSSON,2006-02-15 09:34:33,7.0
+9,JOE,SWANK,2006-02-15 09:34:33,8.0
+10,CHRISTIAN,GABLE,2006-02-15 09:34:33,9.1
+11,ZERO,CAGE,2006-02-15 09:34:33,10.001
+12,KARL,BERRY,2017-11-02 19:15:42.308637+08,11.001
+\."#;
+    pg_and_generic().verified_stmt(sql_comma_separated);
 }
 
 #[test]

From 819c0958d6f59120794fb9f15d6a910c659b53e2 Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Wed, 29 Oct 2025 10:16:19 +0100
Subject: [PATCH 06/10] Tentatively added support for path identifiers

---
 src/dialect/mod.rs       |  5 +++++
 src/dialect/snowflake.rs | 22 ++++++++++++++++++++--
 src/tokenizer.rs         | 24 ++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index abc8291d7..df19a598a 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -183,6 +183,11 @@ pub trait Dialect: Debug + Any {
         false
     }
 
+    /// Returns whether the dialect supports path-like identifiers
+    fn supports_path_like_identifiers(&self) -> bool {
+        false
+    }
+
     /// Most dialects do not have custom operators. Override this method to provide custom operators.
     fn is_custom_operator_part(&self, _ch: char) -> bool {
         false
diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs
index 6b40125e3..ba370b34c 100644
--- a/src/dialect/snowflake.rs
+++ b/src/dialect/snowflake.rs
@@ -158,6 +158,10 @@ impl Dialect for SnowflakeDialect {
             || ch == '_'
     }
 
+    fn supports_path_like_identifiers(&self) -> bool {
+        true
+    }
+
     // See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences
     fn supports_string_literal_backslash_escape(&self) -> bool {
         true
@@ -1067,8 +1071,22 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserE
             Token::Plus => ident.push('+'),
             Token::Minus => ident.push('-'),
             Token::Number(n, _) => ident.push_str(n),
-            Token::Word(w) => ident.push_str(&w.to_string()),
-            _ => return parser.expected("stage name identifier", parser.peek_token()),
+            Token::Word(w) => {
+                if matches!(w.keyword, Keyword::NoKeyword) {
+                    ident.push_str(w.to_string().as_str());
+                } else {
+                    parser.prev_token();
+                    break;
+                }
+            }
+            token => {
+                return {
+                    println!(
+                        "Unexpected token {token:?} while parsing stage name identifier {ident:?}"
+                    );
+                    parser.expected("stage name identifier", parser.peek_token())
+                }
+            }
         }
     }
     Ok(Ident::new(ident))
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index f49468fe3..9b1094f8b 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1669,6 +1669,11 @@ impl<'a> Tokenizer<'a> {
                             _ => self.start_binop(chars, "~~", Token::DoubleTilde),
                         }
                     }
+                    Some('/') if self.dialect.supports_path_like_identifiers() => {
+                        // regular identifier starting with an "E" or "e"
+                        let s = self.tokenize_word("~", chars, prev_keyword)?;
+                        Ok(Some(Token::make_word(s, None)))
+                    } 
                     _ => self.start_binop(chars, "~", Token::Tilde),
                 }
             }
@@ -1969,6 +1974,25 @@ impl<'a> Tokenizer<'a> {
         s.push_str(&peeking_take_while(chars, |ch| {
             self.dialect.is_identifier_part(ch)
         }));
+
+        while !matches!(prev_keyword, Some(Keyword::SELECT))
+            && self.dialect.supports_path_like_identifiers()
+            && chars.peek().map(|&ch| ch == '/').unwrap_or(false)
+            && chars
+                .peekable
+                .clone()
+                .nth(1)
+                .map(|ch| ch.is_alphabetic())
+                .unwrap_or(false)
+        {
+            s.push('/');
+            chars.next(); // consume the '/'
+
+            s.push_str(&peeking_take_while(chars, |ch| {
+                self.dialect.is_identifier_part(ch)
+            }));
+        }
+
         if !matches!(prev_keyword, Some(Keyword::SELECT))
             && self.dialect.supports_hyphenated_identifiers()
         {

From 7ea97462defecdf9b8100ffbb3e8943b00496904 Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Wed, 29 Oct 2025 10:37:32 +0100
Subject: [PATCH 07/10] Tentatively fixed snowflake ident in @-prefixed paths

---
 src/dialect/snowflake.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs
index ba370b34c..4e91bf8d3 100644
--- a/src/dialect/snowflake.rs
+++ b/src/dialect/snowflake.rs
@@ -1072,7 +1072,7 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserE
             Token::Minus => ident.push('-'),
             Token::Number(n, _) => ident.push_str(n),
             Token::Word(w) => {
-                if matches!(w.keyword, Keyword::NoKeyword) {
+                if matches!(w.keyword, Keyword::NoKeyword) || ident.ends_with("@") {
                     ident.push_str(w.to_string().as_str());
                 } else {
                     parser.prev_token();

From c6c391c114987265d0f9cbeb8a3d9de12ea763b9 Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Wed, 29 Oct 2025 10:38:42 +0100
Subject: [PATCH 08/10] Fixed broken doc test

---
 src/tokenizer.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9b1094f8b..3fa46a48c 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -851,7 +851,7 @@ impl<'a> Tokenizer<'a> {
     /// Create a new SQL tokenizer for the specified SQL statement
     ///
     /// ```
-    /// # use sqlparser::tokenizer::{Token, Whitespace, Tokenizer};
+    /// # use sqlparser::tokenizer::{Token, Tokenizer};
     /// # use sqlparser::dialect::GenericDialect;
     /// # let dialect = GenericDialect{};
     /// let query = r#"SELECT 'foo'"#;
@@ -861,7 +861,6 @@ impl<'a> Tokenizer<'a> {
     ///
     /// assert_eq!(tokens, vec![
     ///   Token::make_word("SELECT", None),
-    ///   Token::Whitespace(Whitespace::Space),
     ///   Token::SingleQuotedString("foo".to_string()),
     /// ]);
     pub fn new(dialect: &'a dyn Dialect, query: &'a str) -> Self {
@@ -1673,7 +1672,7 @@ impl<'a> Tokenizer<'a> {
                         // regular identifier starting with an "E" or "e"
                         let s = self.tokenize_word("~", chars, prev_keyword)?;
                         Ok(Some(Token::make_word(s, None)))
-                    } 
+                    }
                     _ => self.start_binop(chars, "~", Token::Tilde),
                 }
             }

From 5120d8c051122163b9a414d64b03a377a5f28076 Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Wed, 29 Oct 2025 11:22:07 +0100
Subject: [PATCH 09/10] Fixed code smells

---
 src/ast/ddl.rs           |  2 +-
 src/ast/mod.rs           |  9 +---
 src/dialect/snowflake.rs |  9 +---
 src/parser/mod.rs        | 99 ++++------------------------------------
 src/test_utils.rs        |  1 -
 src/tokenizer.rs         | 13 +++---
 6 files changed, 20 insertions(+), 113 deletions(-)

diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs
index fd481213f..5b74e65be 100644
--- a/src/ast/ddl.rs
+++ b/src/ast/ddl.rs
@@ -19,7 +19,7 @@
 //! (commonly referred to as Data Definition Language, or DDL)
 
 #[cfg(not(feature = "std"))]
-use alloc::{boxed::Box, format, string::String, vec, vec::Vec};
+use alloc::{boxed::Box, format, string::String, vec::Vec};
 use core::fmt::{self, Display, Write};
 
 #[cfg(feature = "serde")]
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 6ddf32819..2e4898a38 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -2787,10 +2787,11 @@ impl fmt::Display for Declare {
 }
 
 /// Sql options of a `CREATE TABLE` statement.
-#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Default)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub enum CreateTableOptions {
+    #[default]
     None,
     /// Options specified using the `WITH` keyword.
     /// e.g. `WITH (description = "123")`
@@ -2819,12 +2820,6 @@ pub enum CreateTableOptions {
     TableProperties(Vec<SqlOption>),
 }
 
-impl Default for CreateTableOptions {
-    fn default() -> Self {
-        Self::None
-    }
-}
-
 impl fmt::Display for CreateTableOptions {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs
index 4e91bf8d3..91d041757 100644
--- a/src/dialect/snowflake.rs
+++ b/src/dialect/snowflake.rs
@@ -1079,14 +1079,7 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserE
                     break;
                 }
             }
-            token => {
-                return {
-                    println!(
-                        "Unexpected token {token:?} while parsing stage name identifier {ident:?}"
-                    );
-                    parser.expected("stage name identifier", parser.peek_token())
-                }
-            }
+            _ => return parser.expected("stage name identifier", parser.peek_token()),
         }
     }
     Ok(Ident::new(ident))
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 90c52bb87..6225681fb 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -4031,13 +4031,13 @@ impl<'a> Parser<'a> {
     /// See [`Self::peek_token`] for an example.
     pub fn peek_tokens_with_location<const N: usize>(&self) -> [TokenWithSpan; N] {
         let mut index = self.index;
-        core::array::from_fn(|_| loop {
+        core::array::from_fn(|_| {
             let token = self.tokens.get(index);
             index += 1;
-            break token.cloned().unwrap_or(TokenWithSpan {
+            token.cloned().unwrap_or(TokenWithSpan {
                 token: Token::EOF,
                 span: Span::empty(),
-            });
+            })
         })
     }
 
@@ -4047,10 +4047,10 @@ impl<'a> Parser<'a> {
     /// See [`Self::peek_tokens`] for an example.
     pub fn peek_tokens_ref<const N: usize>(&self) -> [&TokenWithSpan; N] {
         let mut index = self.index;
-        core::array::from_fn(|_| loop {
+        core::array::from_fn(|_| {
             let token = self.tokens.get(index);
             index += 1;
-            break token.unwrap_or(&EOF_TOKEN);
+            token.unwrap_or(&EOF_TOKEN)
         })
     }
 
@@ -8546,7 +8546,7 @@ impl<'a> Parser<'a> {
                     return self.expected(
                         "FULLTEXT or SPATIAL option without constraint name",
                         TokenWithSpan {
-                            token: Token::make_keyword(&name.to_string()),
+                            token: Token::make_keyword(name.to_string()),
                             span: next_token.span,
                         },
                     );
@@ -11125,9 +11125,9 @@ impl<'a> Parser<'a> {
         let mut parts = vec![];
         if dialect_of!(self is BigQueryDialect) && in_table_clause {
             loop {
-                let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
+                let ident = self.parse_identifier()?;
                 parts.push(ObjectNamePart::Identifier(ident));
-                if !self.consume_token(&Token::Period) && !end_with_period {
+                if !self.consume_token(&Token::Period) {
                     break;
                 }
             }
@@ -11141,9 +11141,9 @@ impl<'a> Parser<'a> {
                         span,
                     }));
                 } else if dialect_of!(self is BigQueryDialect) && in_table_clause {
-                    let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
+                    let ident = self.parse_identifier()?;
                     parts.push(ObjectNamePart::Identifier(ident));
-                    if !self.consume_token(&Token::Period) && !end_with_period {
+                    if !self.consume_token(&Token::Period) {
                         break;
                     }
                 } else if self.dialect.supports_object_name_double_dot_notation()
@@ -11322,85 +11322,6 @@ impl<'a> Parser<'a> {
         }
     }
 
-    /// On BigQuery, hyphens are permitted in unquoted identifiers inside of a FROM or
-    /// TABLE clause.
-    ///
-    /// The first segment must be an ordinary unquoted identifier, e.g. it must not start
-    /// with a digit. Subsequent segments are either must either be valid identifiers or
-    /// integers, e.g. foo-123 is allowed, but foo-123a is not.
-    ///
-    /// [BigQuery-lexical](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical)
-    ///
-    /// Return a tuple of the identifier and a boolean indicating it ends with a period.
-    fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, bool), ParserError> {
-        match self.peek_token().token {
-            // Token::Word(w) => {
-            //     let quote_style_is_none = w.quote_style.is_none();
-            //     let mut requires_whitespace = false;
-            //     let mut ident = w.into_ident(self.next_token().span);
-            //     if quote_style_is_none {
-            //         while matches!(self.peek_token().token, Token::Minus) {
-            //             unreachable!("Something went wrong in the tokenizer!");
-            //             // self.next_token();
-            //             // ident.value.push('-');
-
-            //             // let token = self
-            //             //     .next_token_no_skip()
-            //             //     .cloned()
-            //             //     .unwrap_or(TokenWithSpan::wrap(Token::EOF));
-            //             // requires_whitespace = match token.token {
-            //             //     Token::Word(next_word) if next_word.quote_style.is_none() => {
-            //             //         ident.value.push_str(&next_word.value);
-            //             //         false
-            //             //     }
-            //             //     Token::Number(s, false) => {
-            //             //         // A number token can represent a decimal value ending with a period, e.g., `Number('123.')`.
-            //             //         // However, for an [ObjectName], it is part of a hyphenated identifier, e.g., `foo-123.bar`.
-            //             //         //
-            //             //         // If a number token is followed by a period, it is part of an [ObjectName].
-            //             //         // Return the identifier with `true` if the number token is followed by a period, indicating that
-            //             //         // parsing should continue for the next part of the hyphenated identifier.
-            //             //         if s.ends_with('.') {
-            //             //             let Some(s) = s.split('.').next().filter(|s| {
-            //             //                 !s.is_empty() && s.chars().all(|c| c.is_ascii_digit())
-            //             //             }) else {
-            //             //                 return self.expected(
-            //             //                     "continuation of hyphenated identifier",
-            //             //                     TokenWithSpan::new(Token::Number(s, false), token.span),
-            //             //                 );
-            //             //             };
-            //             //             ident.value.push_str(s);
-            //             //             return Ok((ident, true));
-            //             //         } else {
-            //             //             ident.value.push_str(&s);
-            //             //         }
-            //             //         // If next token is period, then it is part of an ObjectName and we don't expect whitespace
-            //             //         // after the number.
-            //             //         !matches!(self.peek_token().token, Token::Period)
-            //             //     }
-            //             //     _ => {
-            //             //         return self
-            //             //             .expected("continuation of hyphenated identifier", token);
-            //             //     }
-            //             // }
-            //         }
-
-            //         // If the last segment was a number, we must check that it's followed by whitespace,
-            //         // otherwise foo-123a will be parsed as `foo-123` with the alias `a`.
-            //         if requires_whitespace {
-            //             let token = self.next_token();
-            //             if !matches!(token.token, Token::EOF) {
-            //                 return self
-            //                     .expected("whitespace following hyphenated identifier", token);
-            //             }
-            //         }
-            //     }
-            //     Ok((ident, false))
-            // }
-            _ => Ok((self.parse_identifier()?, false)),
-        }
-    }
-
     /// Parses a parenthesized, comma-separated list of column definitions within a view.
     fn parse_view_columns(&mut self) -> Result<Vec<ViewColumnDef>, ParserError> {
         if self.consume_token(&Token::LParen) {
diff --git a/src/test_utils.rs b/src/test_utils.rs
index 978447d96..a8c8afd59 100644
--- a/src/test_utils.rs
+++ b/src/test_utils.rs
@@ -154,7 +154,6 @@ impl TestedDialects {
     ///
     ///  For multiple statements, use [`statements_parse_to`].
     pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement {
-        println!("Testing SQL: {}", sql);
         let mut statements = self.parse_sql_statements(sql).expect(sql);
         assert_eq!(statements.len(), 1);
         if !canonical.is_empty() && sql != canonical {
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 3fa46a48c..b5d7a67f2 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -23,7 +23,6 @@
 
 #[cfg(not(feature = "std"))]
 use alloc::{
-    borrow::ToOwned,
     format,
     string::{String, ToString},
     vec,
@@ -1319,12 +1318,12 @@ impl<'a> Tokenizer<'a> {
                 // If so, what follows is definitely not part of a decimal number and
                 // we should yield the dot as a dedicated token so compound identifiers
                 // starting with digits can be parsed correctly.
-                if s == "." && self.dialect.supports_numeric_prefix() {
-                    if !preceded_by_whitespace
-                        && !matches!(prev_token, Some(Token::Plus | Token::Minus))
-                    {
-                        return Ok(Some(Token::Period));
-                    }
+                if s == "."
+                    && self.dialect.supports_numeric_prefix()
+                    && !preceded_by_whitespace
+                    && !matches!(prev_token, Some(Token::Plus | Token::Minus))
+                {
+                    return Ok(Some(Token::Period));
                 }
 
                 // Consume fractional digits.

From 07a828f661df0384e42c7823417ddb59e0da12db Mon Sep 17 00:00:00 2001
From: Luca <cappelletti.luca94@gmail.com>
Date: Wed, 29 Oct 2025 18:42:41 +0100
Subject: [PATCH 10/10] Replaced CSV with custom csv parser

---
 Cargo.toml                  |   1 -
 src/ast/mod.rs              |  68 ++++++++++------
 src/parser/mod.rs           | 149 +++++++++++++++++++++++++++---------
 tests/sqlparser_postgres.rs |  24 ++++++
 4 files changed, 182 insertions(+), 60 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 005cb4567..ed94bbbdd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -47,7 +47,6 @@ visitor = ["sqlparser_derive"]
 [dependencies]
 bigdecimal = { version = "0.4.1", features = ["serde"], optional = true }
 log = "0.4"
-csv = "1.4.0"
 recursive = { version = "0.1.1", optional = true}
 
 serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 2e4898a38..0b4402386 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -4576,19 +4576,21 @@ impl fmt::Display for Statement {
                 }
 
                 let mut null_symbol = "\\N";
-                let mut writer_builder = csv::WriterBuilder::new();
+                let mut delimiter = '\t';
+                let mut quote = '"';
+                let mut escape = '\\';
 
                 // Apply options
                 for option in options {
                     match option {
                         CopyOption::Delimiter(c) => {
-                            writer_builder.delimiter(*c as u8);
+                            delimiter = *c;
                         }
                         CopyOption::Quote(c) => {
-                            writer_builder.quote(*c as u8);
+                            quote = *c;
                         }
                         CopyOption::Escape(c) => {
-                            writer_builder.escape(*c as u8);
+                            escape = *c;
                         }
                         CopyOption::Null(null) => {
                             null_symbol = null;
@@ -4601,10 +4603,7 @@ impl fmt::Display for Statement {
                 for option in legacy_options {
                     match option {
                         CopyLegacyOption::Delimiter(c) => {
-                            writer_builder.delimiter(*c as u8);
-                        }
-                        CopyLegacyOption::Header => {
-                            writer_builder.has_headers(true);
+                            delimiter = *c;
                         }
                         CopyLegacyOption::Null(null) => {
                             null_symbol = null;
@@ -4612,14 +4611,11 @@ impl fmt::Display for Statement {
                         CopyLegacyOption::Csv(csv_options) => {
                             for csv_option in csv_options {
                                 match csv_option {
-                                    CopyLegacyCsvOption::Header => {
-                                        writer_builder.has_headers(true);
-                                    }
                                     CopyLegacyCsvOption::Quote(c) => {
-                                        writer_builder.quote(*c as u8);
+                                        quote = *c;
                                     }
                                     CopyLegacyCsvOption::Escape(c) => {
-                                        writer_builder.escape(*c as u8);
+                                        escape = *c;
                                     }
                                     _ => {}
                                 }
@@ -4631,19 +4627,43 @@ impl fmt::Display for Statement {
 
                 if !values.is_empty() {
                     writeln!(f, ";")?;
-                    let mut writer = writer_builder.from_writer(vec![]);
+
+                    // Simple CSV writer
                     for row in values {
-                        writer
-                            .write_record(
-                                row.iter()
-                                    .map(|column| column.as_deref().unwrap_or(null_symbol)),
-                            )
-                            .map_err(|_| fmt::Error)?
+                        for (idx, column) in row.iter().enumerate() {
+                            if idx > 0 {
+                                write!(f, "{}", delimiter)?;
+                            }
+
+                            let field_value = column.as_deref().unwrap_or(null_symbol);
+
+                            // Check if field needs quoting
+                            let needs_quoting = field_value.contains(delimiter)
+                                || field_value.contains(quote)
+                                || field_value.contains('\n')
+                                || field_value.contains('\r');
+
+                            if needs_quoting {
+                                write!(f, "{}", quote)?;
+                                for ch in field_value.chars() {
+                                    if ch == quote {
+                                        // Escape quote by doubling it
+                                        write!(f, "{}{}", quote, quote)?;
+                                    } else if ch == escape {
+                                        // Escape escape character
+                                        write!(f, "{}{}", escape, escape)?;
+                                    } else {
+                                        write!(f, "{}", ch)?;
+                                    }
+                                }
+                                write!(f, "{}", quote)?;
+                            } else {
+                                write!(f, "{}", field_value)?;
+                            }
+                        }
+                        writeln!(f)?;
                     }
-                    writer.flush().map_err(|_| fmt::Error)?;
-                    let data = String::from_utf8(writer.into_inner().map_err(|_| fmt::Error)?)
-                        .map_err(|_| fmt::Error)?;
-                    write!(f, "{}", data)?;
+
                     write!(f, "\\.")?;
                 }
                 Ok(())
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 6225681fb..9feb2a776 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -9542,25 +9542,22 @@ impl<'a> Parser<'a> {
             return self.expected("COPY ... FROM STDIN with CSV body", self.peek_token());
         };
 
-        let mut reader_builder = csv::ReaderBuilder::new();
-        reader_builder.has_headers(false);
-
+        let mut delimiter = '\t';
+        let mut quote = '"';
+        let mut escape = '\\';
         let mut null_symbol = "\\N";
 
         // Apply options
         for option in options {
             match option {
                 CopyOption::Delimiter(c) => {
-                    reader_builder.delimiter(*c as u8);
-                }
-                CopyOption::Header(has_header) => {
-                    reader_builder.has_headers(*has_header);
+                    delimiter = *c;
                 }
                 CopyOption::Quote(c) => {
-                    reader_builder.quote(*c as u8);
+                    quote = *c;
                 }
                 CopyOption::Escape(c) => {
-                    reader_builder.escape(Some(*c as u8));
+                    escape = *c;
                 }
                 CopyOption::Null(null) => {
                     null_symbol = null;
@@ -9573,10 +9570,7 @@ impl<'a> Parser<'a> {
         for option in legacy_options {
             match option {
                 CopyLegacyOption::Delimiter(c) => {
-                    reader_builder.delimiter(*c as u8);
-                }
-                CopyLegacyOption::Header => {
-                    reader_builder.has_headers(true);
+                    delimiter = *c;
                 }
                 CopyLegacyOption::Null(null) => {
                     null_symbol = null;
@@ -9584,14 +9578,11 @@ impl<'a> Parser<'a> {
                 CopyLegacyOption::Csv(csv_options) => {
                     for csv_option in csv_options {
                         match csv_option {
-                            CopyLegacyCsvOption::Header => {
-                                reader_builder.has_headers(true);
-                            }
                             CopyLegacyCsvOption::Quote(c) => {
-                                reader_builder.quote(*c as u8);
+                                quote = *c;
                             }
                             CopyLegacyCsvOption::Escape(c) => {
-                                reader_builder.escape(Some(*c as u8));
+                                escape = *c;
                             }
                             _ => {}
                         }
@@ -9601,28 +9592,116 @@ impl<'a> Parser<'a> {
             }
         }
 
+        // Simple CSV parser
         let mut result = vec![];
-        let mut reader = reader_builder.from_reader(body.as_bytes());
-        for record in reader.records() {
-            let record = match record {
-                Ok(rec) => rec,
-                Err(e) => {
-                    return Err(ParserError::ParserError(format!(
-                        "Error parsing CSV data: {}",
-                        e
-                    )))
-                }
-            };
-            let mut row = vec![];
-            for field in record.iter() {
-                if field == null_symbol {
-                    row.push(None);
+        let mut current_row = vec![];
+        let mut current_field = String::new();
+        let mut in_quotes = false;
+        let mut chars = body.chars().peekable();
+        let mut expected_column_count: Option<usize> = None;
+        let mut row_number = 0;
+
+        while let Some(ch) = chars.next() {
+            if in_quotes {
+                if ch == quote {
+                    // Check if it's an escaped quote
+                    if let Some(&next_ch) = chars.peek() {
+                        if next_ch == quote {
+                            // Escaped quote
+                            current_field.push(quote);
+                            chars.next();
+                        } else {
+                            // End of quoted field
+                            in_quotes = false;
+                        }
+                    } else {
+                        // End of quoted field at end of input
+                        in_quotes = false;
+                    }
+                } else if ch == escape {
+                    // Escape character
+                    if let Some(next_ch) = chars.next() {
+                        current_field.push(next_ch);
+                    }
+                } else {
+                    current_field.push(ch);
+                }
+            } else if ch == quote {
+                in_quotes = true;
+            } else if ch == delimiter {
+                // End of field
+                if current_field == null_symbol {
+                    current_row.push(None);
                 } else {
-                    row.push(Some(field.to_string()));
+                    current_row.push(Some(current_field.clone()));
+                }
+                current_field.clear();
+            } else if ch == '\n' || ch == '\r' {
+                // End of record
+                if ch == '\r' {
+                    // Skip \n if it follows \r
+                    if let Some(&'\n') = chars.peek() {
+                        chars.next();
+                    }
+                }
+                if !current_field.is_empty() || !current_row.is_empty() {
+                    if current_field == null_symbol {
+                        current_row.push(None);
+                    } else {
+                        current_row.push(Some(current_field.clone()));
+                    }
+                    current_field.clear();
+
+                    // Validate column count
+                    row_number += 1;
+                    if let Some(expected) = expected_column_count {
+                        if current_row.len() != expected {
+                            return Err(ParserError::ParserError(format!(
+                                "CSV row {} has {} columns, but expected {} columns based on first row",
+                                row_number,
+                                current_row.len(),
+                                expected
+                            )));
+                        }
+                    } else {
+                        // First row establishes the expected column count
+                        expected_column_count = Some(current_row.len());
+                    }
+
+                    result.push(current_row.clone());
+                    current_row.clear();
+                }
+            } else {
+                current_field.push(ch);
+            }
+        }
+
+        // Handle remaining field/row
+        if !current_field.is_empty() || !current_row.is_empty() {
+            if current_field == null_symbol {
+                current_row.push(None);
+            } else {
+                current_row.push(Some(current_field));
+            }
+
+            // Validate column count for last row
+            row_number += 1;
+            if let Some(expected) = expected_column_count {
+                if current_row.len() != expected {
+                    return Err(ParserError::ParserError(format!(
+                        "CSV row {} has {} columns, but expected {} columns based on first row",
+                        row_number,
+                        current_row.len(),
+                        expected
+                    )));
                 }
             }
-            result.push(row);
+            // Note: if this is the first and only row, we don't need to set expected_column_count
+            // since there's nothing to validate against
+
+            result.push(current_row);
         }
+
         Ok(result)
     }
 
diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs
index bd337a96d..bfdbabb1c 100644
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@@ -1045,6 +1045,30 @@ fn parse_copy_from_stdin() {
 12,KARL,BERRY,2017-11-02 19:15:42.308637+08,11.001
 \."#;
     pg_and_generic().verified_stmt(sql_comma_separated);
+
+    let incorrect_csv_sql = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN (FORMAT csv, DELIMITER ',');
+1,PENELOPE,GUINESS,2006-02-15 09:34:33,0.11111
+2,NICK,WAHLBERG,2006-02-15 09:34:33
+\."#;
+    let parsed = pg_and_generic().parse_sql_statements(incorrect_csv_sql);
+    assert_eq!(
+        parsed.unwrap_err(),
+        ParserError::ParserError(
+            "CSV row 2 has 4 columns, but expected 5 columns based on first row".to_string()
+        )
+    );
+
+    let mixed_incorrect_separators = r#"COPY public.actor (actor_id, first_name, last_name, last_update, value) FROM STDIN (FORMAT csv, DELIMITER ',');
+1,PENELOPE,GUINESS,2006-02-15 09:34:33,0.11111
+2	NICK	WAHLBERG	2006-02-15 09:34:33,0.22222
+\."#;
+    let parsed = pg_and_generic().parse_sql_statements(mixed_incorrect_separators);
+    assert_eq!(
+        parsed.unwrap_err(),
+        ParserError::ParserError(
+            "CSV row 2 has 2 columns, but expected 5 columns based on first row".to_string()
+        )
+    );
 }
 
 #[test]