Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a dialect-specific rule for unparsing an identifier with or without quotes #10573

Merged
merged 16 commits into from
May 22, 2024
Merged
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ object_store = { version = "0.9.1", default-features = false }
parking_lot = "0.12"
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
rand = "0.8"
regex = "1.8"
rstest = "0.19.0"
serde_json = "1"
sqlparser = { version = "0.45.0", features = ["visitor"] }
Expand Down
1 change: 1 addition & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 2 additions & 14 deletions datafusion-examples/examples/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ use datafusion_sql::unparser::{plan_to_sql, Unparser};
async fn main() -> Result<()> {
// See how to evaluate expressions
simple_expr_to_sql_demo()?;
simple_expr_to_sql_demo_no_escape()?;
simple_expr_to_sql_demo_escape_mysql_style()?;
simple_plan_to_sql_demo().await?;
round_trip_plan_to_sql_demo().await?;
Expand All @@ -61,17 +60,6 @@ async fn main() -> Result<()> {
fn simple_expr_to_sql_demo() -> Result<()> {
let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
let sql = expr_to_sql(&expr)?.to_string();
assert_eq!(sql, r#"(("a" < 5) OR ("a" = 8))"#);
Ok(())
}

/// DataFusion can convert expressions to SQL without escaping column names using
/// using a custom dialect and an explicit unparser
fn simple_expr_to_sql_demo_no_escape() -> Result<()> {
let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
let dialect = CustomDialect::new(None);
let unparser = Unparser::new(&dialect);
let sql = unparser.expr_to_sql(&expr)?.to_string();
assert_eq!(sql, r#"((a < 5) OR (a = 8))"#);
Ok(())
}
Expand Down Expand Up @@ -106,7 +94,7 @@ async fn simple_plan_to_sql_demo() -> Result<()> {

assert_eq!(
sql,
r#"SELECT "?table?"."id", "?table?"."int_col", "?table?"."double_col", "?table?"."date_string_col" FROM "?table?""#
r#"SELECT "?table?".id, "?table?".int_col, "?table?".double_col, "?table?".date_string_col FROM "?table?""#
);

Ok(())
Expand Down Expand Up @@ -145,7 +133,7 @@ async fn round_trip_plan_to_sql_demo() -> Result<()> {
let sql = plan_to_sql(df.logical_plan())?.to_string();
assert_eq!(
sql,
r#"SELECT "alltypes_plain"."int_col", "alltypes_plain"."double_col", CAST("alltypes_plain"."date_string_col" AS VARCHAR) FROM "alltypes_plain" WHERE (("alltypes_plain"."id" > 1) AND ("alltypes_plain"."tinyint_col" < "alltypes_plain"."double_col"))"#
r#"SELECT alltypes_plain.int_col, alltypes_plain.double_col, CAST(alltypes_plain.date_string_col AS VARCHAR) FROM alltypes_plain WHERE ((alltypes_plain.id > 1) AND (alltypes_plain.tinyint_col < alltypes_plain.double_col))"#
);

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ postgres-protocol = "0.6.4"
postgres-types = { version = "0.2.4", features = ["derive", "with-chrono-0_4"] }
rand = { workspace = true, features = ["small_rng"] }
rand_distr = "0.4.3"
regex = "1.5.4"
regex = { workspace = true }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that is certainly nice to use the same version of regex everywhere 👍

rstest = { workspace = true }
rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
serde_json = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ itertools = { workspace = true }
log = { workspace = true }
md-5 = { version = "^0.10.0", optional = true }
rand = { workspace = true }
regex = { version = "1.8", optional = true }
regex = { worksapce = true, optional = true }
sha2 = { version = "^0.10.1", optional = true }
unicode-segmentation = { version = "^1.7.1", optional = true }
uuid = { version = "1.7", features = ["v4"], optional = true }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ itertools = { workspace = true, features = ["use_std"] }
log = { workspace = true }
paste = "^1.0"
petgraph = "0.6.2"
regex = { version = "1.8", optional = true }
regex = { workspace = true, optional = true }

[dev-dependencies]
arrow = { workspace = true, features = ["test_utils"] }
Expand Down
1 change: 1 addition & 0 deletions datafusion/sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ arrow-schema = { workspace = true }
datafusion-common = { workspace = true, default-features = true }
datafusion-expr = { workspace = true }
log = { workspace = true }
regex = { workspace = true }
sqlparser = { workspace = true }
strum = { version = "0.26.1", features = ["derive"] }

Expand Down
29 changes: 21 additions & 8 deletions datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,41 +15,54 @@
// specific language governing permissions and limitations
// under the License.

/// Dialect is used to capture dialect specific syntax.
use regex::Regex;
use sqlparser::keywords::ALL_KEYWORDS;

/// `Dialect` to use for Unparsing
///
/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`)
/// but this behavior can be overridden as needed
/// Note: this trait will eventually be replaced by the Dialect in the SQLparser package
///
/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
pub trait Dialect {
fn identifier_quote_style(&self) -> Option<char>;
fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
}
pub struct DefaultDialect {}

impl Dialect for DefaultDialect {
fn identifier_quote_style(&self) -> Option<char> {
Some('"')
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
if ALL_KEYWORDS.contains(&identifier.to_uppercase().as_str())
|| !identifier_regex.is_match(identifier)
{
Some('"')
} else {
None
}
}
}

pub struct PostgreSqlDialect {}

impl Dialect for PostgreSqlDialect {
fn identifier_quote_style(&self) -> Option<char> {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('"')
}
}

pub struct MySqlDialect {}

impl Dialect for MySqlDialect {
fn identifier_quote_style(&self) -> Option<char> {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('`')
}
}

pub struct SqliteDialect {}

impl Dialect for SqliteDialect {
fn identifier_quote_style(&self) -> Option<char> {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
Some('`')
}
}
Expand All @@ -67,7 +80,7 @@ impl CustomDialect {
}

impl Dialect for CustomDialect {
fn identifier_quote_style(&self) -> Option<char> {
fn identifier_quote_style(&self, _: &str) -> Option<char> {
self.identifier_quote_style
}
}