@@ -29,22 +29,22 @@ use alloc::{
2929 vec,
3030 vec:: Vec ,
3131} ;
32- use core:: iter:: Peekable ;
3332use core:: num:: NonZeroU8 ;
3433use core:: str:: Chars ;
3534use core:: { cmp, fmt} ;
35+ use core:: { iter:: Peekable , str} ;
3636
3737#[ cfg( feature = "serde" ) ]
3838use serde:: { Deserialize , Serialize } ;
3939
4040#[ cfg( feature = "visitor" ) ]
4141use sqlparser_derive:: { Visit , VisitMut } ;
4242
43- use crate :: dialect:: Dialect ;
4443use crate :: dialect:: {
4544 BigQueryDialect , DuckDbDialect , GenericDialect , MySqlDialect , PostgreSqlDialect ,
4645 SnowflakeDialect ,
4746} ;
47+ use crate :: dialect:: { Dialect , OracleDialect } ;
4848use crate :: keywords:: { Keyword , ALL_KEYWORDS , ALL_KEYWORDS_INDEX } ;
4949use crate :: { ast:: DollarQuotedString , dialect:: HiveDialect } ;
5050
@@ -98,6 +98,12 @@ pub enum Token {
9898 TripleDoubleQuotedRawStringLiteral ( String ) ,
9999 /// "National" string literal: i.e: N'string'
100100 NationalStringLiteral ( String ) ,
101+ /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
102+ /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
103+ QuoteDelimitedStringLiteral ( char , String , char ) ,
104+ /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
105+ /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
106+ NationalQuoteDelimitedStringLiteral ( char , String , char ) ,
101107 /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
102108 EscapedStringLiteral ( String ) ,
103109 /// Unicode string literal: i.e: U&'first \000A second'
@@ -292,6 +298,10 @@ impl fmt::Display for Token {
292298 Token :: TripleDoubleQuotedString ( ref s) => write ! ( f, "\" \" \" {s}\" \" \" " ) ,
293299 Token :: DollarQuotedString ( ref s) => write ! ( f, "{s}" ) ,
294300 Token :: NationalStringLiteral ( ref s) => write ! ( f, "N'{s}'" ) ,
301+ Token :: QuoteDelimitedStringLiteral ( q1, ref s, q2) => write ! ( f, "Q'{q1}{s}{q2}'" ) ,
302+ Token :: NationalQuoteDelimitedStringLiteral ( q1, ref s, q2) => {
303+ write ! ( f, "NQ'{q1}{s}{q2}'" )
304+ }
295305 Token :: EscapedStringLiteral ( ref s) => write ! ( f, "E'{s}'" ) ,
296306 Token :: UnicodeStringLiteral ( ref s) => write ! ( f, "U&'{s}'" ) ,
297307 Token :: HexStringLiteral ( ref s) => write ! ( f, "X'{s}'" ) ,
@@ -1032,13 +1042,32 @@ impl<'a> Tokenizer<'a> {
10321042 self . tokenize_single_quoted_string ( chars, '\'' , backslash_escape) ?;
10331043 Ok ( Some ( Token :: NationalStringLiteral ( s) ) )
10341044 }
1045+ Some ( & q @ 'q' ) | Some ( & q @ 'Q' ) if dialect_of ! ( self is OracleDialect | GenericDialect ) =>
1046+ {
1047+ chars. next ( ) ; // consume and check the next char
1048+ self . tokenize_word_or_quote_delimited_string (
1049+ chars,
1050+ & [ n, q] ,
1051+ Token :: NationalQuoteDelimitedStringLiteral ,
1052+ )
1053+ . map ( Some )
1054+ }
10351055 _ => {
10361056 // regular identifier starting with an "N"
10371057 let s = self . tokenize_word ( n, chars) ;
10381058 Ok ( Some ( Token :: make_word ( & s, None ) ) )
10391059 }
10401060 }
10411061 }
1062+ q @ 'Q' | q @ 'q' if dialect_of ! ( self is OracleDialect | GenericDialect ) => {
1063+ chars. next ( ) ; // consume and check the next char
1064+ self . tokenize_word_or_quote_delimited_string (
1065+ chars,
1066+ & [ q] ,
1067+ Token :: QuoteDelimitedStringLiteral ,
1068+ )
1069+ . map ( Some )
1070+ }
10421071 // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
10431072 x @ 'e' | x @ 'E' if self . dialect . supports_string_escape_constant ( ) => {
10441073 let starting_loc = chars. location ( ) ;
@@ -1994,6 +2023,70 @@ impl<'a> Tokenizer<'a> {
19942023 )
19952024 }
19962025
2026+ /// Reads a quote delimited string without "backslash escaping" or a word
2027+ /// depending on whether `chars.next()` delivers a `'`.
2028+ ///
2029+ /// See https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html
2030+ fn tokenize_word_or_quote_delimited_string (
2031+ & self ,
2032+ chars : & mut State ,
2033+ // the prefix that introduced the possible literal or word,
2034+ // e.g. "Q" or "nq"
2035+ word_prefix : & [ char ] ,
2036+ // turns an identified quote string literal,
2037+ // ie. `(start-quote-char, string-literal, end-quote-char)`
2038+ // into a token
2039+ as_literal : fn ( char , String , char ) -> Token ,
2040+ ) -> Result < Token , TokenizerError > {
2041+ match chars. peek ( ) {
2042+ Some ( '\'' ) => {
2043+ chars. next ( ) ;
2044+ // ~ determine the "quote character(s)"
2045+ let error_loc = chars. location ( ) ;
2046+ let ( start_quote_char, end_quote_char) = match chars. next ( ) {
2047+ // ~ "newline" is not allowed by Oracle's SQL Reference,
2048+ // but works with sql*plus nevertheless
2049+ None | Some ( ' ' ) | Some ( '\t' ) | Some ( '\r' ) | Some ( '\n' ) => {
2050+ return self . tokenizer_error (
2051+ error_loc,
2052+ format ! (
2053+ "Invalid space, tab, newline, or EOF after '{}''." ,
2054+ String :: from_iter( word_prefix)
2055+ ) ,
2056+ ) ;
2057+ }
2058+ Some ( c) => (
2059+ c,
2060+ match c {
2061+ '[' => ']' ,
2062+ '{' => '}' ,
2063+ '<' => '>' ,
2064+ '(' => ')' ,
2065+ c => c,
2066+ } ,
2067+ ) ,
2068+ } ;
2069+ // read the string literal until the "quote character" following a by literal quote
2070+ let mut s = String :: new ( ) ;
2071+ while let Some ( ch) = chars. next ( ) {
2072+ if ch == end_quote_char {
2073+ if let Some ( '\'' ) = chars. peek ( ) {
2074+ chars. next ( ) ; // ~ consume the quote
2075+ return Ok ( as_literal ( start_quote_char, s, end_quote_char) ) ;
2076+ }
2077+ }
2078+ s. push ( ch) ;
2079+ }
2080+ self . tokenizer_error ( error_loc, "Unterminated string literal" )
2081+ }
2082+ // ~ not a literal introduced with _token_prefix_, assm
2083+ _ => {
2084+ let s = self . tokenize_word ( String :: from_iter ( word_prefix) , chars) ;
2085+ Ok ( Token :: make_word ( & s, None ) )
2086+ }
2087+ }
2088+ }
2089+
19972090 /// Read a quoted string.
19982091 fn tokenize_quoted_string (
19992092 & self ,
0 commit comments