Skip to content

Commit

Permalink
Implement a dialect-specific rule for unparsing an identifier with or…
Browse files Browse the repository at this point in the history
… without quotes (#10573)

* add ident needs quote check

* implement the check for default dialect and fix tests

* add test for need-quoted cases

* update cargo lock

* fomrat cargo toml

* fix the example test

* move regex to top level

* add comments for new_ident_quoted_if_needs func

* fix typo and add test for space

* fix example test

* fix example test

* fix the test fail

* remove unused example and modified comments

* fix typo

* follow the latest Dialect trait in sqlparser

* fix the parameter name
  • Loading branch information
goldmedal committed May 22, 2024
1 parent 0355713 commit 7bd4b53
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 83 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ object_store = { version = "0.9.1", default-features = false }
parking_lot = "0.12"
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
rand = "0.8"
regex = "1.8"
rstest = "0.19.0"
serde_json = "1"
sqlparser = { version = "0.45.0", features = ["visitor"] }
Expand Down
1 change: 1 addition & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 2 additions & 14 deletions datafusion-examples/examples/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ use datafusion_sql::unparser::{plan_to_sql, Unparser};
async fn main() -> Result<()> {
// See how to evaluate expressions
simple_expr_to_sql_demo()?;
simple_expr_to_sql_demo_no_escape()?;
simple_expr_to_sql_demo_escape_mysql_style()?;
simple_plan_to_sql_demo().await?;
round_trip_plan_to_sql_demo().await?;
Expand All @@ -61,17 +60,6 @@ async fn main() -> Result<()> {
fn simple_expr_to_sql_demo() -> Result<()> {
let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
let sql = expr_to_sql(&expr)?.to_string();
assert_eq!(sql, r#"(("a" < 5) OR ("a" = 8))"#);
Ok(())
}

/// DataFusion can convert expressions to SQL without escaping column names using
/// using a custom dialect and an explicit unparser
fn simple_expr_to_sql_demo_no_escape() -> Result<()> {
let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
let dialect = CustomDialect::new(None);
let unparser = Unparser::new(&dialect);
let sql = unparser.expr_to_sql(&expr)?.to_string();
assert_eq!(sql, r#"((a < 5) OR (a = 8))"#);
Ok(())
}
Expand Down Expand Up @@ -106,7 +94,7 @@ async fn simple_plan_to_sql_demo() -> Result<()> {

assert_eq!(
sql,
r#"SELECT "?table?"."id", "?table?"."int_col", "?table?"."double_col", "?table?"."date_string_col" FROM "?table?""#
r#"SELECT "?table?".id, "?table?".int_col, "?table?".double_col, "?table?".date_string_col FROM "?table?""#
);

Ok(())
Expand Down Expand Up @@ -145,7 +133,7 @@ async fn round_trip_plan_to_sql_demo() -> Result<()> {
let sql = plan_to_sql(df.logical_plan())?.to_string();
assert_eq!(
sql,
r#"SELECT "alltypes_plain"."int_col", "alltypes_plain"."double_col", CAST("alltypes_plain"."date_string_col" AS VARCHAR) FROM "alltypes_plain" WHERE (("alltypes_plain"."id" > 1) AND ("alltypes_plain"."tinyint_col" < "alltypes_plain"."double_col"))"#
r#"SELECT alltypes_plain.int_col, alltypes_plain.double_col, CAST(alltypes_plain.date_string_col AS VARCHAR) FROM alltypes_plain WHERE ((alltypes_plain.id > 1) AND (alltypes_plain.tinyint_col < alltypes_plain.double_col))"#
);

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ postgres-protocol = "0.6.4"
postgres-types = { version = "0.2.4", features = ["derive", "with-chrono-0_4"] }
rand = { workspace = true, features = ["small_rng"] }
rand_distr = "0.4.3"
regex = "1.5.4"
regex = { workspace = true }
rstest = { workspace = true }
rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
serde_json = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ itertools = { workspace = true }
log = { workspace = true }
md-5 = { version = "^0.10.0", optional = true }
rand = { workspace = true }
regex = { version = "1.8", optional = true }
regex = { worksapce = true, optional = true }
sha2 = { version = "^0.10.1", optional = true }
unicode-segmentation = { version = "^1.7.1", optional = true }
uuid = { version = "1.7", features = ["v4"], optional = true }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ itertools = { workspace = true, features = ["use_std"] }
log = { workspace = true }
paste = "^1.0"
petgraph = "0.6.2"
regex = { version = "1.8", optional = true }
regex = { workspace = true, optional = true }

[dev-dependencies]
arrow = { workspace = true, features = ["test_utils"] }
Expand Down
1 change: 1 addition & 0 deletions datafusion/sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ arrow-schema = { workspace = true }
datafusion-common = { workspace = true, default-features = true }
datafusion-expr = { workspace = true }
log = { workspace = true }
regex = { workspace = true }
sqlparser = { workspace = true }
strum = { version = "0.26.1", features = ["derive"] }

Expand Down
29 changes: 21 additions & 8 deletions datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,41 +15,54 @@
// specific language governing permissions and limitations
// under the License.

/// Dialect is used to capture dialect specific syntax.
use regex::Regex;
use sqlparser::keywords::ALL_KEYWORDS;

/// `Dialect` to use for Unparsing
///
/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`)
/// but this behavior can be overridden as needed
/// Note: this trait will eventually be replaced by the Dialect in the SQLparser package
///
/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
pub trait Dialect {
fn identifier_quote_style(&self) -> Option<char>;
fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
}
pub struct DefaultDialect {}

impl Dialect for DefaultDialect {
fn identifier_quote_style(&self) -> Option<char> {
Some('"')
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
if ALL_KEYWORDS.contains(&identifier.to_uppercase().as_str())
|| !identifier_regex.is_match(identifier)
{
Some('"')
} else {
None
}
}
}

pub struct PostgreSqlDialect {}

impl Dialect for PostgreSqlDialect {
fn identifier_quote_style(&self) -> Option<char> {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('"')
}
}

pub struct MySqlDialect {}

impl Dialect for MySqlDialect {
fn identifier_quote_style(&self) -> Option<char> {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('`')
}
}

pub struct SqliteDialect {}

impl Dialect for SqliteDialect {
fn identifier_quote_style(&self) -> Option<char> {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('`')
}
}
Expand All @@ -67,7 +80,7 @@ impl CustomDialect {
}

impl Dialect for CustomDialect {
fn identifier_quote_style(&self) -> Option<char> {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
self.identifier_quote_style
}
}
Loading

0 comments on commit 7bd4b53

Please sign in to comment.