Skip to content

Commit 3af91ea

Browse files
committed
Supported quote delimited strings
1 parent 07b074b commit 3af91ea

File tree

4 files changed

+282
-4
lines changed

4 files changed

+282
-4
lines changed

src/ast/value.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,12 @@ pub enum Value {
167167
TripleDoubleQuotedRawStringLiteral(String),
168168
/// N'string value'
169169
NationalStringLiteral(String),
170+
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
171+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
172+
QuoteDelimitedStringLiteral(char, String, char),
173+
/// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
174+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
175+
NationalQuoteDelimitedStringLiteral(char, String, char),
170176
/// X'hex value'
171177
HexStringLiteral(String),
172178

@@ -205,6 +211,8 @@ impl Value {
205211
| Value::EscapedStringLiteral(s)
206212
| Value::UnicodeStringLiteral(s)
207213
| Value::NationalStringLiteral(s)
214+
| Value::QuoteDelimitedStringLiteral(_, s, _)
215+
| Value::NationalQuoteDelimitedStringLiteral(_, s, _)
208216
| Value::HexStringLiteral(s) => Some(s),
209217
Value::DollarQuotedString(s) => Some(s.value),
210218
_ => None,
@@ -242,6 +250,8 @@ impl fmt::Display for Value {
242250
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
243251
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
244252
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
253+
Value::QuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "Q'{q1}{s}{q2}'"),
254+
Value::NationalQuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "NQ'{q1}{s}{q2}'"),
245255
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
246256
Value::Boolean(v) => write!(f, "{v}"),
247257
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),

src/parser/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1704,6 +1704,8 @@ impl<'a> Parser<'a> {
17041704
| Token::TripleSingleQuotedRawStringLiteral(_)
17051705
| Token::TripleDoubleQuotedRawStringLiteral(_)
17061706
| Token::NationalStringLiteral(_)
1707+
| Token::QuoteDelimitedStringLiteral(_, _, _)
1708+
| Token::NationalQuoteDelimitedStringLiteral(_, _, _)
17071709
| Token::HexStringLiteral(_) => {
17081710
self.prev_token();
17091711
Ok(Expr::Value(self.parse_value()?))
@@ -2716,6 +2718,8 @@ impl<'a> Parser<'a> {
27162718
| Token::EscapedStringLiteral(_)
27172719
| Token::UnicodeStringLiteral(_)
27182720
| Token::NationalStringLiteral(_)
2721+
| Token::QuoteDelimitedStringLiteral(_, _, _)
2722+
| Token::NationalQuoteDelimitedStringLiteral(_, _, _)
27192723
| Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
27202724
_ => self.expected(
27212725
"either filler, WITH, or WITHOUT in LISTAGG",
@@ -10483,6 +10487,12 @@ impl<'a> Parser<'a> {
1048310487
Token::NationalStringLiteral(ref s) => {
1048410488
ok_value(Value::NationalStringLiteral(s.to_string()))
1048510489
}
10490+
Token::QuoteDelimitedStringLiteral(q1, s, q2) => {
10491+
ok_value(Value::QuoteDelimitedStringLiteral(q1, s, q2))
10492+
}
10493+
Token::NationalQuoteDelimitedStringLiteral(q1, s, q2) => {
10494+
ok_value(Value::NationalQuoteDelimitedStringLiteral(q1, s, q2))
10495+
}
1048610496
Token::EscapedStringLiteral(ref s) => {
1048710497
ok_value(Value::EscapedStringLiteral(s.to_string()))
1048810498
}

src/tokenizer.rs

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,22 @@ use alloc::{
2929
vec,
3030
vec::Vec,
3131
};
32-
use core::iter::Peekable;
3332
use core::num::NonZeroU8;
3433
use core::str::Chars;
3534
use core::{cmp, fmt};
35+
use core::{iter::Peekable, str};
3636

3737
#[cfg(feature = "serde")]
3838
use serde::{Deserialize, Serialize};
3939

4040
#[cfg(feature = "visitor")]
4141
use sqlparser_derive::{Visit, VisitMut};
4242

43-
use crate::dialect::Dialect;
4443
use crate::dialect::{
4544
BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect,
4645
SnowflakeDialect,
4746
};
47+
use crate::dialect::{Dialect, OracleDialect};
4848
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
4949
use crate::{ast::DollarQuotedString, dialect::HiveDialect};
5050

@@ -98,6 +98,12 @@ pub enum Token {
9898
TripleDoubleQuotedRawStringLiteral(String),
9999
/// "National" string literal: i.e: N'string'
100100
NationalStringLiteral(String),
101+
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
102+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
103+
QuoteDelimitedStringLiteral(char, String, char),
104+
/// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
105+
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html)
106+
NationalQuoteDelimitedStringLiteral(char, String, char),
101107
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
102108
EscapedStringLiteral(String),
103109
/// Unicode string literal: i.e: U&'first \000A second'
@@ -292,6 +298,10 @@ impl fmt::Display for Token {
292298
Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
293299
Token::DollarQuotedString(ref s) => write!(f, "{s}"),
294300
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
301+
Token::QuoteDelimitedStringLiteral(q1, ref s, q2) => write!(f, "Q'{q1}{s}{q2}'"),
302+
Token::NationalQuoteDelimitedStringLiteral(q1, ref s, q2) => {
303+
write!(f, "NQ'{q1}{s}{q2}'")
304+
}
295305
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
296306
Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
297307
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
@@ -1032,13 +1042,32 @@ impl<'a> Tokenizer<'a> {
10321042
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
10331043
Ok(Some(Token::NationalStringLiteral(s)))
10341044
}
1045+
Some(&q @ 'q') | Some(&q @ 'Q') if dialect_of!(self is OracleDialect | GenericDialect) =>
1046+
{
1047+
chars.next(); // consume and check the next char
1048+
self.tokenize_word_or_quote_delimited_string(
1049+
chars,
1050+
&[n, q],
1051+
Token::NationalQuoteDelimitedStringLiteral,
1052+
)
1053+
.map(Some)
1054+
}
10351055
_ => {
10361056
// regular identifier starting with an "N"
10371057
let s = self.tokenize_word(n, chars);
10381058
Ok(Some(Token::make_word(&s, None)))
10391059
}
10401060
}
10411061
}
1062+
q @ 'Q' | q @ 'q' if dialect_of!(self is OracleDialect | GenericDialect) => {
1063+
chars.next(); // consume and check the next char
1064+
self.tokenize_word_or_quote_delimited_string(
1065+
chars,
1066+
&[q],
1067+
Token::QuoteDelimitedStringLiteral,
1068+
)
1069+
.map(Some)
1070+
}
10421071
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
10431072
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
10441073
let starting_loc = chars.location();
@@ -1994,6 +2023,70 @@ impl<'a> Tokenizer<'a> {
19942023
)
19952024
}
19962025

2026+
/// Reads a quote delimited string without "backslash escaping" or a word
2027+
/// depending on whether `chars.next()` delivers a `'`.
2028+
///
2029+
/// See https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html
2030+
fn tokenize_word_or_quote_delimited_string(
2031+
&self,
2032+
chars: &mut State,
2033+
// the prefix that introduced the possible literal or word,
2034+
// e.g. "Q" or "nq"
2035+
word_prefix: &[char],
2036+
// turns an identified quote string literal,
2037+
// ie. `(start-quote-char, string-literal, end-quote-char)`
2038+
// into a token
2039+
as_literal: fn(char, String, char) -> Token,
2040+
) -> Result<Token, TokenizerError> {
2041+
match chars.peek() {
2042+
Some('\'') => {
2043+
chars.next();
2044+
// ~ determine the "quote character(s)"
2045+
let error_loc = chars.location();
2046+
let (start_quote_char, end_quote_char) = match chars.next() {
2047+
// ~ "newline" is not allowed by Oracle's SQL Reference,
2048+
// but works with sql*plus nevertheless
2049+
None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => {
2050+
return self.tokenizer_error(
2051+
error_loc,
2052+
format!(
2053+
"Invalid space, tab, newline, or EOF after '{}''.",
2054+
String::from_iter(word_prefix)
2055+
),
2056+
);
2057+
}
2058+
Some(c) => (
2059+
c,
2060+
match c {
2061+
'[' => ']',
2062+
'{' => '}',
2063+
'<' => '>',
2064+
'(' => ')',
2065+
c => c,
2066+
},
2067+
),
2068+
};
2069+
// read the string literal until the "quote character" following a by literal quote
2070+
let mut s = String::new();
2071+
while let Some(ch) = chars.next() {
2072+
if ch == end_quote_char {
2073+
if let Some('\'') = chars.peek() {
2074+
chars.next(); // ~ consume the quote
2075+
return Ok(as_literal(start_quote_char, s, end_quote_char));
2076+
}
2077+
}
2078+
s.push(ch);
2079+
}
2080+
self.tokenizer_error(error_loc, "Unterminated string literal")
2081+
}
2082+
// ~ not a literal introduced with _token_prefix_, assm
2083+
_ => {
2084+
let s = self.tokenize_word(String::from_iter(word_prefix), chars);
2085+
Ok(Token::make_word(&s, None))
2086+
}
2087+
}
2088+
}
2089+
19972090
/// Read a quoted string.
19982091
fn tokenize_quoted_string(
19992092
&self,

0 commit comments

Comments
 (0)