From 07b074b12bd85c2f35f7252dbed11bb549f92d30 Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Fri, 28 Nov 2025 18:45:44 +0100 Subject: [PATCH 1/2] OracleDialect: Initial draft --- examples/cli.rs | 1 + src/ast/query.rs | 9 ++++ src/dialect/mod.rs | 83 ++++++++++++++++++++----------- src/dialect/oracle.rs | 102 ++++++++++++++++++++++++++++++++++++++ src/keywords.rs | 9 ++++ src/parser/mod.rs | 2 +- src/test_utils.rs | 1 + tests/sqlparser_common.rs | 14 ++++-- tests/sqlparser_oracle.rs | 87 ++++++++++++++++++++++++++++++++ 9 files changed, 272 insertions(+), 36 deletions(-) create mode 100644 src/dialect/oracle.rs create mode 100644 tests/sqlparser_oracle.rs diff --git a/examples/cli.rs b/examples/cli.rs index 08a40a6dd..3c4299b20 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -58,6 +58,7 @@ $ cargo run --example cli - [--dialectname] "--clickhouse" => Box::new(ClickHouseDialect {}), "--duckdb" => Box::new(DuckDbDialect {}), "--sqlite" => Box::new(SQLiteDialect {}), + "--oracle" => Box::new(OracleDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {s}"), }; diff --git a/src/ast/query.rs b/src/ast/query.rs index 16fc9ec0e..67972f256 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -174,6 +174,15 @@ impl SetExpr { None } } + + /// If this `SetExpr` is a `SELECT`, returns a mutable [`Select`]. + pub fn as_select_mut(&mut self) -> Option<&mut Select> { + if let Self::Select(select) = self { + Some(&mut **select) + } else { + None + } + } } impl fmt::Display for SetExpr { diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index ef4e1cdde..49a0486da 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -24,6 +24,7 @@ mod generic; mod hive; mod mssql; mod mysql; +mod oracle; mod postgresql; mod redshift; mod snowflake; @@ -45,6 +46,7 @@ pub use self::generic::GenericDialect; pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; +pub use self::oracle::OracleDialect; pub use self::postgresql::PostgreSqlDialect; pub use self::redshift::RedshiftSqlDialect; pub use self::snowflake::SnowflakeDialect; @@ -84,6 +86,26 @@ macro_rules! dialect_is { } } +const DEFAULT_PREC_VALUE_PERIOD: u8 = 100; +const DEFAULT_PREC_VALUE_DOUBLE_COLON: u8 = 50; +const DEFAULT_PREC_VALUE_AT_TZ: u8 = 41; +const DEFAULT_PREC_VALUE_MUL_DIV_MOD_OP: u8 = 40; +const DEFAULT_PREC_VALUE_PLUS_MINUS: u8 = 30; +const DEFAULT_PREC_VALUE_XOR: u8 = 24; +const DEFAULT_PREC_VALUE_AMPERSAND: u8 = 23; +const DEFAULT_PREC_VALUE_CARET: u8 = 22; +const DEFAULT_PREC_VALUE_PIPE: u8 = 21; +const DEFAULT_PREC_VALUE_BETWEEN: u8 = 20; +const DEFAULT_PREC_VALUE_EQ: u8 = 20; +const DEFAULT_PREC_VALUE_LIKE: u8 = 19; +const DEFAULT_PREC_VALUE_IS: u8 = 17; +const DEFAULT_PREC_VALUE_PG_OTHER: u8 = 16; +const DEFAULT_PREC_VALUE_UNARY_NOT: u8 = 15; +const DEFAULT_PREC_VALUE_AND: u8 = 10; +const DEFAULT_PREC_VALUE_OR: u8 = 5; + +const DEFAULT_PREC_VALUE_UNKNOWN: u8 = 0; + /// Encapsulates the differences between SQL implementations. /// /// # SQL Dialects @@ -773,6 +795,36 @@ pub trait Dialect: Debug + Any { } } + /// Decide the lexical Precedence of operators. + /// + /// Uses (APPROXIMATELY) as a reference + fn prec_value(&self, prec: Precedence) -> u8 { + match prec { + Precedence::Period => DEFAULT_PREC_VALUE_PERIOD, + Precedence::DoubleColon => DEFAULT_PREC_VALUE_DOUBLE_COLON, + Precedence::AtTz => DEFAULT_PREC_VALUE_AT_TZ, + Precedence::MulDivModOp => DEFAULT_PREC_VALUE_MUL_DIV_MOD_OP, + Precedence::PlusMinus => DEFAULT_PREC_VALUE_PLUS_MINUS, + Precedence::Xor => DEFAULT_PREC_VALUE_XOR, + Precedence::Ampersand => DEFAULT_PREC_VALUE_AMPERSAND, + Precedence::Caret => DEFAULT_PREC_VALUE_CARET, + Precedence::Pipe => DEFAULT_PREC_VALUE_PIPE, + Precedence::Between => DEFAULT_PREC_VALUE_BETWEEN, + Precedence::Eq => DEFAULT_PREC_VALUE_EQ, + Precedence::Like => DEFAULT_PREC_VALUE_LIKE, + Precedence::Is => DEFAULT_PREC_VALUE_IS, + Precedence::PgOther => DEFAULT_PREC_VALUE_PG_OTHER, + Precedence::UnaryNot => DEFAULT_PREC_VALUE_UNARY_NOT, + Precedence::And => DEFAULT_PREC_VALUE_AND, + Precedence::Or => DEFAULT_PREC_VALUE_OR, + } + } + + /// Returns the precedence when the precedence is otherwise unknown + fn prec_unknown(&self) -> u8 { + DEFAULT_PREC_VALUE_UNKNOWN + } + /// Dialect-specific statement parser override /// /// This method is called to parse the next statement. @@ -796,36 +848,6 @@ pub trait Dialect: Debug + Any { Ok(None) } - /// Decide the lexical Precedence of operators. - /// - /// Uses (APPROXIMATELY) as a reference - fn prec_value(&self, prec: Precedence) -> u8 { - match prec { - Precedence::Period => 100, - Precedence::DoubleColon => 50, - Precedence::AtTz => 41, - Precedence::MulDivModOp => 40, - Precedence::PlusMinus => 30, - Precedence::Xor => 24, - Precedence::Ampersand => 23, - Precedence::Caret => 22, - Precedence::Pipe => 21, - Precedence::Between => 20, - Precedence::Eq => 20, - Precedence::Like => 19, - Precedence::Is => 17, - Precedence::PgOther => 16, - Precedence::UnaryNot => 15, - Precedence::And => 10, - Precedence::Or => 5, - } - } - - /// Returns the precedence when the precedence is otherwise unknown - fn prec_unknown(&self) -> u8 { - 0 - } - /// Returns true if this dialect requires the `TABLE` keyword after `DESCRIBE` /// /// Defaults to false. @@ -1260,6 +1282,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef) -> Option Some(Box::new(AnsiDialect {})), "duckdb" => Some(Box::new(DuckDbDialect {})), "databricks" => Some(Box::new(DatabricksDialect {})), + "oracle" => Some(Box::new(OracleDialect {})), _ => None, } } diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs new file mode 100644 index 000000000..b0907baa4 --- /dev/null +++ b/src/dialect/oracle.rs @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use log::debug; + +use crate::{dialect::DEFAULT_PREC_VALUE_MUL_DIV_MOD_OP, tokenizer::Token}; + +use super::Dialect; + +/// A [`Dialect`] for [Oracle Databases](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/index.html) +#[derive(Debug)] +pub struct OracleDialect; + +impl Dialect for OracleDialect { + // ~ appears not to be called anywhere + fn identifier_quote_style(&self, _identifier: &str) -> Option { + Some('"') + } + + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '"' + } + + fn is_identifier_start(&self, ch: char) -> bool { + ch.is_alphabetic() + } + + fn is_identifier_part(&self, ch: char) -> bool { + ch.is_alphanumeric() || ch == '_' || ch == '$' || ch == '#' || ch == '@' + } + + fn supports_outer_join_operator(&self) -> bool { + true + } + + fn supports_connect_by(&self) -> bool { + true + } + + fn supports_execute_immediate(&self) -> bool { + true + } + + fn supports_match_recognize(&self) -> bool { + true + } + + fn supports_window_function_null_treatment_arg(&self) -> bool { + true + } + + fn supports_boolean_literals(&self) -> bool { + false + } + + fn supports_comment_on(&self) -> bool { + true + } + + fn supports_create_table_select(&self) -> bool { + true + } + + fn supports_set_stmt_without_operator(&self) -> bool { + true + } + + fn get_next_precedence( + &self, + _parser: &crate::parser::Parser, + ) -> Option> { + let t = _parser.peek_token(); + debug!("get_next_precedence() {t:?}"); + + match t.token { + Token::StringConcat => { + // ~ overriding the default precedence to the same level as mul-div-mod + // ~ see: https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/About-SQL-Operators.html + Some(Ok(DEFAULT_PREC_VALUE_MUL_DIV_MOD_OP)) + } + _ => None, + } + } + + fn supports_group_by_expr(&self) -> bool { + true + } +} diff --git a/src/keywords.rs b/src/keywords.rs index 834d34955..827df1cee 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -220,6 +220,7 @@ define_keywords!( COMMITTED, COMMUTATOR, COMPATIBLE, + COMPRESS, COMPRESSION, COMPUPDATE, COMPUTE, @@ -464,6 +465,7 @@ define_keywords!( IAM_ROLE, ICEBERG, ID, + IDENTIFIED, IDENTITY, IDENTITY_INSERT, IF, @@ -567,6 +569,7 @@ define_keywords!( LOG, LOGIN, LOGS, + LONG, LONGBLOB, LONGTEXT, LOWCARDINALITY, @@ -652,6 +655,7 @@ define_keywords!( NFKD, NO, NOBYPASSRLS, + NOCOMPRESS, NOCREATEDB, NOCREATEROLE, NOINHERIT, @@ -675,6 +679,7 @@ define_keywords!( NULLABLE, NULLIF, NULLS, + NUMBER, NUMERIC, NVARCHAR, OBJECT, @@ -741,6 +746,7 @@ define_keywords!( PAST, PATH, PATTERN, + PCTFREE, PER, PERCENT, PERCENTILE_CONT, @@ -913,6 +919,7 @@ define_keywords!( SIGNED, SIMILAR, SIMPLE, + SIZE, SKIP, SLOW, SMALLINT, @@ -974,6 +981,7 @@ define_keywords!( SWAP, SYMMETRIC, SYNC, + SYNONYM, SYSTEM, SYSTEM_TIME, SYSTEM_USER, @@ -1085,6 +1093,7 @@ define_keywords!( VARBINARY, VARBIT, VARCHAR, + VARCHAR2, VARIABLE, VARIABLES, VARYING, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f3daf628a..0ce9739ad 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12228,7 +12228,7 @@ impl<'a> Parser<'a> { let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. // https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#delete_statement - if dialect_of!(self is BigQueryDialect | GenericDialect) { + if dialect_of!(self is BigQueryDialect | OracleDialect | GenericDialect) { (vec![], false) } else { let tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; diff --git a/src/test_utils.rs b/src/test_utils.rs index 73d29312b..9ba5960e8 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -291,6 +291,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(DuckDbDialect {}), Box::new(DatabricksDialect {}), Box::new(ClickHouseDialect {}), + Box::new(OracleDialect {}), ]) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 91952b8c0..ccad67e39 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -34,8 +34,8 @@ use sqlparser::ast::TableFactor::{Pivot, Unpivot}; use sqlparser::ast::*; use sqlparser::dialect::{ AnsiDialect, BigQueryDialect, ClickHouseDialect, DatabricksDialect, Dialect, DuckDbDialect, - GenericDialect, HiveDialect, MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, - SQLiteDialect, SnowflakeDialect, + GenericDialect, HiveDialect, MsSqlDialect, MySqlDialect, OracleDialect, PostgreSqlDialect, + RedshiftSqlDialect, SQLiteDialect, SnowflakeDialect, }; use sqlparser::keywords::{Keyword, ALL_KEYWORDS}; use sqlparser::parser::{Parser, ParserError, ParserOptions}; @@ -712,7 +712,9 @@ fn parse_delete_statement() { fn parse_delete_without_from_error() { let sql = "DELETE \"table\" WHERE 1"; - let dialects = all_dialects_except(|d| d.is::() || d.is::()); + let dialects = all_dialects_except(|d| { + d.is::() || d.is::() || d.is::() + }); let res = dialects.parse_sql_statements(sql); assert_eq!( ParserError::ParserError("Expected: FROM, found: WHERE".to_string()), @@ -723,7 +725,9 @@ fn parse_delete_without_from_error() { #[test] fn parse_delete_statement_for_multi_tables() { let sql = "DELETE schema1.table1, schema2.table2 FROM schema1.table1 JOIN schema2.table2 ON schema2.table2.col1 = schema1.table1.col1 WHERE schema2.table2.col2 = 1"; - let dialects = all_dialects_except(|d| d.is::() || d.is::()); + let dialects = all_dialects_except(|d| { + d.is::() || d.is::() || d.is::() + }); match dialects.verified_stmt(sql) { Statement::Delete(Delete { tables, @@ -12943,7 +12947,7 @@ fn test_match_recognize_patterns() { fn check(pattern: &str, expect: MatchRecognizePattern) { let select = all_dialects_where(|d| d.supports_match_recognize()).verified_only_select(&format!( - "SELECT * FROM my_table MATCH_RECOGNIZE(PATTERN ({pattern}) DEFINE DUMMY AS true)" // "select * from my_table match_recognize (" + "SELECT * FROM my_table MATCH_RECOGNIZE(PATTERN ({pattern}) DEFINE DUMMY AS 1 = 1)" // "select * from my_table match_recognize (" )); let TableFactor::MatchRecognize { pattern: actual, .. diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs new file mode 100644 index 000000000..cf94fbcb5 --- /dev/null +++ b/tests/sqlparser_oracle.rs @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![warn(clippy::all)] +//! Test SQL syntax, specific to [sqlparser::dialect::OracleDialect]. + +extern crate core; + +#[cfg(test)] +use pretty_assertions::assert_eq; + +use sqlparser::{ + ast::{Expr, SelectItem, Value}, + dialect::OracleDialect, +}; +#[cfg(test)] +use test_utils::TestedDialects; + +mod test_utils; + +#[test] +fn muldiv_have_same_precedence_as_strconcat() { + // ~ oracle: `*`, `/`, and `||` have the same precedence and parse from left to right + for op in ["*", "/"] { + let sql = format!("SELECT 1 {op} 2 || 'asdf' FROM dual"); + let mut query = oracle_dialect().verified_query(&sql); + + nest_binary_ops(&mut query.body.as_select_mut().expect("not a SELECT").projection[0]); + assert_eq!( + &format!("{query}"), + &format!("SELECT ((1 {op} 2) || 'asdf') FROM dual") + ); + } +} + +#[test] +fn plusminus_have_lower_precedence_than_strconcat() { + // ~ oracle: `||` has higher precedence than `+` or `-` + for op in ["+", "-"] { + let sql = format!("SELECT 1 {op} 2 || 'asdf' FROM dual"); + let mut query = oracle_dialect().verified_query(&sql); + + nest_binary_ops(&mut query.body.as_select_mut().expect("not a SELECT").projection[0]); + assert_eq!( + &format!("{query}"), + &format!("SELECT (1 {op} (2 || 'asdf')) FROM dual") + ); + } +} + +fn oracle_dialect() -> TestedDialects { + TestedDialects::new(vec![Box::new(OracleDialect)]) +} + +/// Wraps [Expr::BinaryExpr]s in `item` with a [Expr::Nested] recursively. +fn nest_binary_ops(item: &mut SelectItem) { + // ~ idealy, we could use `VisitorMut` at this point + fn nest(expr: &mut Expr) { + // ~ ideally we could use VisitorMut here + if let Expr::BinaryOp { left, op: _, right } = expr { + nest(&mut *left); + nest(&mut *right); + let inner = std::mem::replace(expr, Expr::Value(Value::Null.into())); + *expr = Expr::Nested(Box::new(inner)); + } + } + match item { + SelectItem::UnnamedExpr(expr) => nest(expr), + SelectItem::ExprWithAlias { expr, alias: _ } => nest(expr), + SelectItem::QualifiedWildcard(_, _) => {} + SelectItem::Wildcard(_) => {} + } +} From 331e05615b34327b0d96b5893d223f6ca3b19bdf Mon Sep 17 00:00:00 2001 From: Petr Novotnik Date: Sun, 30 Nov 2025 19:30:35 +0100 Subject: [PATCH 2/2] Supported quote delimited strings --- src/ast/value.rs | 10 +++ src/parser/mod.rs | 10 +++ src/tokenizer.rs | 97 +++++++++++++++++++++- tests/sqlparser_oracle.rs | 169 +++++++++++++++++++++++++++++++++++++- 4 files changed, 282 insertions(+), 4 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index fdfa6a674..f4d05c31f 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -167,6 +167,12 @@ pub enum Value { TripleDoubleQuotedRawStringLiteral(String), /// N'string value' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + QuoteDelimitedStringLiteral(char, String, char), + /// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + NationalQuoteDelimitedStringLiteral(char, String, char), /// X'hex value' HexStringLiteral(String), @@ -205,6 +211,8 @@ impl Value { | Value::EscapedStringLiteral(s) | Value::UnicodeStringLiteral(s) | Value::NationalStringLiteral(s) + | Value::QuoteDelimitedStringLiteral(_, s, _) + | Value::NationalQuoteDelimitedStringLiteral(_, s, _) | Value::HexStringLiteral(s) => Some(s), Value::DollarQuotedString(s) => Some(s.value), _ => None, @@ -242,6 +250,8 @@ impl fmt::Display for Value { Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), + Value::QuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "Q'{q1}{s}{q2}'"), + Value::NationalQuoteDelimitedStringLiteral(q1, s, q2) => write!(f, "NQ'{q1}{s}{q2}'"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0ce9739ad..f37e261d0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1704,6 +1704,8 @@ impl<'a> Parser<'a> { | Token::TripleSingleQuotedRawStringLiteral(_) | Token::TripleDoubleQuotedRawStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_, _, _) + | Token::NationalQuoteDelimitedStringLiteral(_, _, _) | Token::HexStringLiteral(_) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) @@ -2716,6 +2718,8 @@ impl<'a> Parser<'a> { | Token::EscapedStringLiteral(_) | Token::UnicodeStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_, _, _) + | Token::NationalQuoteDelimitedStringLiteral(_, _, _) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), _ => self.expected( "either filler, WITH, or WITHOUT in LISTAGG", @@ -10483,6 +10487,12 @@ impl<'a> Parser<'a> { Token::NationalStringLiteral(ref s) => { ok_value(Value::NationalStringLiteral(s.to_string())) } + Token::QuoteDelimitedStringLiteral(q1, s, q2) => { + ok_value(Value::QuoteDelimitedStringLiteral(q1, s, q2)) + } + Token::NationalQuoteDelimitedStringLiteral(q1, s, q2) => { + ok_value(Value::NationalQuoteDelimitedStringLiteral(q1, s, q2)) + } Token::EscapedStringLiteral(ref s) => { ok_value(Value::EscapedStringLiteral(s.to_string())) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 54a158c1f..fe5002b7a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -29,10 +29,10 @@ use alloc::{ vec, vec::Vec, }; -use core::iter::Peekable; use core::num::NonZeroU8; use core::str::Chars; use core::{cmp, fmt}; +use core::{iter::Peekable, str}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -40,11 +40,11 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::dialect::Dialect; use crate::dialect::{ BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect, SnowflakeDialect, }; +use crate::dialect::{Dialect, OracleDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; use crate::{ast::DollarQuotedString, dialect::HiveDialect}; @@ -98,6 +98,12 @@ pub enum Token { TripleDoubleQuotedRawStringLiteral(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + QuoteDelimitedStringLiteral(char, String, char), + /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html) + NationalQuoteDelimitedStringLiteral(char, String, char), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' EscapedStringLiteral(String), /// Unicode string literal: i.e: U&'first \000A second' @@ -292,6 +298,10 @@ impl fmt::Display for Token { Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""), Token::DollarQuotedString(ref s) => write!(f, "{s}"), Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), + Token::QuoteDelimitedStringLiteral(q1, ref s, q2) => write!(f, "Q'{q1}{s}{q2}'"), + Token::NationalQuoteDelimitedStringLiteral(q1, ref s, q2) => { + write!(f, "NQ'{q1}{s}{q2}'") + } Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), @@ -1032,6 +1042,16 @@ impl<'a> Tokenizer<'a> { self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; Ok(Some(Token::NationalStringLiteral(s))) } + Some(&q @ 'q') | Some(&q @ 'Q') if dialect_of!(self is OracleDialect | GenericDialect) => + { + chars.next(); // consume and check the next char + self.tokenize_word_or_quote_delimited_string( + chars, + &[n, q], + Token::NationalQuoteDelimitedStringLiteral, + ) + .map(Some) + } _ => { // regular identifier starting with an "N" let s = self.tokenize_word(n, chars); @@ -1039,6 +1059,15 @@ impl<'a> Tokenizer<'a> { } } } + q @ 'Q' | q @ 'q' if dialect_of!(self is OracleDialect | GenericDialect) => { + chars.next(); // consume and check the next char + self.tokenize_word_or_quote_delimited_string( + chars, + &[q], + Token::QuoteDelimitedStringLiteral, + ) + .map(Some) + } // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => { let starting_loc = chars.location(); @@ -1994,6 +2023,70 @@ impl<'a> Tokenizer<'a> { ) } + /// Reads a quote delimited string without "backslash escaping" or a word + /// depending on whether `chars.next()` delivers a `'`. + /// + /// See + fn tokenize_word_or_quote_delimited_string( + &self, + chars: &mut State, + // the prefix that introduced the possible literal or word, + // e.g. "Q" or "nq" + word_prefix: &[char], + // turns an identified quote string literal, + // ie. `(start-quote-char, string-literal, end-quote-char)` + // into a token + as_literal: fn(char, String, char) -> Token, + ) -> Result { + match chars.peek() { + Some('\'') => { + chars.next(); + // ~ determine the "quote character(s)" + let error_loc = chars.location(); + let (start_quote_char, end_quote_char) = match chars.next() { + // ~ "newline" is not allowed by Oracle's SQL Reference, + // but works with sql*plus nevertheless + None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { + return self.tokenizer_error( + error_loc, + format!( + "Invalid space, tab, newline, or EOF after '{}''.", + String::from_iter(word_prefix) + ), + ); + } + Some(c) => ( + c, + match c { + '[' => ']', + '{' => '}', + '<' => '>', + '(' => ')', + c => c, + }, + ), + }; + // read the string literal until the "quote character" following a by literal quote + let mut s = String::new(); + while let Some(ch) = chars.next() { + if ch == end_quote_char { + if let Some('\'') = chars.peek() { + chars.next(); // ~ consume the quote + return Ok(as_literal(start_quote_char, s, end_quote_char)); + } + } + s.push(ch); + } + self.tokenizer_error(error_loc, "Unterminated string literal") + } + // ~ not a literal introduced with _token_prefix_, assm + _ => { + let s = self.tokenize_word(String::from_iter(word_prefix), chars); + Ok(Token::make_word(&s, None)) + } + } + } + /// Read a quoted string. fn tokenize_quoted_string( &self, diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index cf94fbcb5..5c2956f2e 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -24,11 +24,12 @@ extern crate core; use pretty_assertions::assert_eq; use sqlparser::{ - ast::{Expr, SelectItem, Value}, + ast::{Expr, Ident, SelectItem, Value}, dialect::OracleDialect, + tokenizer::Span, }; #[cfg(test)] -use test_utils::TestedDialects; +use test_utils::{expr_from_projection, TestedDialects}; mod test_utils; @@ -62,6 +63,170 @@ fn plusminus_have_lower_precedence_than_strconcat() { } } +#[test] +fn parse_quote_delimited_string() { + let sql = "SELECT Q'.abc.', \ + Q'Xab'cX', \ + Q'|abc'''|', \ + Q'{abc}d}', \ + Q'[]abc[]', \ + Q'', \ + Q'<<', \ + Q'('abc'('abc)', \ + Q'(abc'def))', \ + Q'(abc'def)))' \ + FROM dual"; + let select = oracle_dialect().verified_only_select(sql); + assert_eq!(10, select.projection.len()); + assert_eq!( + &Expr::Value(Value::QuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span()), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('X', "ab'c".into(), 'X')).with_empty_span() + ), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('|', "abc'''".into(), '|')).with_empty_span() + ), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('{', "abc}d".into(), '}')).with_empty_span() + ), + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('[', "]abc[".into(), ']')).with_empty_span() + ), + expr_from_projection(&select.projection[4]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('<', "a'bc".into(), '>')).with_empty_span() + ), + expr_from_projection(&select.projection[5]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('<', "<')).with_empty_span() + ), + expr_from_projection(&select.projection[6]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('(', "'abc'('abc".into(), ')')).with_empty_span() + ), + expr_from_projection(&select.projection[7]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('(', "abc'def)".into(), ')')).with_empty_span() + ), + expr_from_projection(&select.projection[8]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral('(', "abc'def))".into(), ')')).with_empty_span() + ), + expr_from_projection(&select.projection[9]) + ); +} + +#[test] +fn parse_quote_delimited_string_lowercase() { + let sql = "select q'!a'b'c!d!' from dual"; + let select = + oracle_dialect().verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual"); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!').with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_quote_delimited_string_but_is_a_word() { + let sql = "SELECT q, quux, q.abc FROM dual q"; + let select = oracle_dialect().verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "q")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "quux")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "q"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} + +#[test] +fn parse_national_quote_delimited_string() { + let sql = "SELECT NQ'.abc.' FROM dual"; + let select = oracle_dialect().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral('.', "abc".into(), '.').with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_national_quote_delimited_string_lowercase() { + for prefix in ["nq", "Nq", "nQ", "NQ"] { + let select = oracle_dialect().verified_only_select_with_canonical( + &format!("select {prefix}'!a'b'c!d!' from dual"), + "SELECT NQ'!a'b'c!d!' FROM dual", + ); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral('!', "a'b'c!d".into(), '!') + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + } +} + +#[test] +fn parse_national_quote_delimited_string_but_is_a_word() { + let sql = "SELECT nq, nqoo, nq.abc FROM dual q"; + let select = oracle_dialect().verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nq")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "nq"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} + fn oracle_dialect() -> TestedDialects { TestedDialects::new(vec![Box::new(OracleDialect)]) }