apache · alamb · May 22, 2024 · May 19, 2024 · May 19, 2024 · May 19, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -105,6 +105,7 @@ object_store = { version = "0.9.1", default-features = false }
 parking_lot = "0.12"
 parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
 rand = "0.8"
+regex = "1.8"
 rstest = "0.19.0"
 serde_json = "1"
 sqlparser = { version = "0.45.0", features = ["visitor"] }

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs
@@ -49,7 +49,6 @@ use datafusion_sql::unparser::{plan_to_sql, Unparser};
 async fn main() -> Result<()> {
     // See how to evaluate expressions
     simple_expr_to_sql_demo()?;
-    simple_expr_to_sql_demo_no_escape()?;
     simple_expr_to_sql_demo_escape_mysql_style()?;
     simple_plan_to_sql_demo().await?;
     round_trip_plan_to_sql_demo().await?;
@@ -61,17 +60,6 @@ async fn main() -> Result<()> {
 fn simple_expr_to_sql_demo() -> Result<()> {
     let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
     let sql = expr_to_sql(&expr)?.to_string();
-    assert_eq!(sql, r#"(("a" < 5) OR ("a" = 8))"#);
-    Ok(())
-}
-
-/// DataFusion can convert expressions to SQL without escaping column names using
-/// using a custom dialect and an explicit unparser
-fn simple_expr_to_sql_demo_no_escape() -> Result<()> {
-    let expr = col("a").lt(lit(5)).or(col("a").eq(lit(8)));
-    let dialect = CustomDialect::new(None);
-    let unparser = Unparser::new(&dialect);
-    let sql = unparser.expr_to_sql(&expr)?.to_string();
     assert_eq!(sql, r#"((a < 5) OR (a = 8))"#);
     Ok(())
 }
@@ -106,7 +94,7 @@ async fn simple_plan_to_sql_demo() -> Result<()> {
 
     assert_eq!(
         sql,
-        r#"SELECT "?table?"."id", "?table?"."int_col", "?table?"."double_col", "?table?"."date_string_col" FROM "?table?""#
+        r#"SELECT "?table?".id, "?table?".int_col, "?table?".double_col, "?table?".date_string_col FROM "?table?""#
     );
 
     Ok(())
@@ -145,7 +133,7 @@ async fn round_trip_plan_to_sql_demo() -> Result<()> {
     let sql = plan_to_sql(df.logical_plan())?.to_string();
     assert_eq!(
         sql,
-        r#"SELECT "alltypes_plain"."int_col", "alltypes_plain"."double_col", CAST("alltypes_plain"."date_string_col" AS VARCHAR) FROM "alltypes_plain" WHERE (("alltypes_plain"."id" > 1) AND ("alltypes_plain"."tinyint_col" < "alltypes_plain"."double_col"))"#
+        r#"SELECT alltypes_plain.int_col, alltypes_plain.double_col, CAST(alltypes_plain.date_string_col AS VARCHAR) FROM alltypes_plain WHERE ((alltypes_plain.id > 1) AND (alltypes_plain.tinyint_col < alltypes_plain.double_col))"#
     );
 
     Ok(())

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
@@ -145,7 +145,7 @@ postgres-protocol = "0.6.4"
 postgres-types = { version = "0.2.4", features = ["derive", "with-chrono-0_4"] }
 rand = { workspace = true, features = ["small_rng"] }
 rand_distr = "0.4.3"
-regex = "1.5.4"
+regex = { workspace = true }
 rstest = { workspace = true }
 rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
 serde_json = { workspace = true }

diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
@@ -80,7 +80,7 @@ itertools = { workspace = true }
 log = { workspace = true }
 md-5 = { version = "^0.10.0", optional = true }
 rand = { workspace = true }
-regex = { version = "1.8", optional = true }
+regex = { worksapce = true, optional = true }
 sha2 = { version = "^0.10.1", optional = true }
 unicode-segmentation = { version = "^1.7.1", optional = true }
 uuid = { version = "1.7", features = ["v4"], optional = true }

diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
@@ -66,7 +66,7 @@ itertools = { workspace = true, features = ["use_std"] }
 log = { workspace = true }
 paste = "^1.0"
 petgraph = "0.6.2"
-regex = { version = "1.8", optional = true }
+regex = { workspace = true, optional = true }
 
 [dev-dependencies]
 arrow = { workspace = true, features = ["test_utils"] }

diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
@@ -47,6 +47,7 @@ arrow-schema = { workspace = true }
 datafusion-common = { workspace = true, default-features = true }
 datafusion-expr = { workspace = true }
 log = { workspace = true }
+regex = { workspace = true }
 sqlparser = { workspace = true }
 strum = { version = "0.26.1", features = ["derive"] }
 

diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
@@ -15,41 +15,54 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/// Dialect is used to capture dialect specific syntax.
+use regex::Regex;
+use sqlparser::keywords::ALL_KEYWORDS;
+
+/// `Dialect` to use for Unparsing
+///
+/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`)
+/// but this behavior can be overridden as needed
 /// Note: this trait will eventually be replaced by the Dialect in the SQLparser package
 ///
 /// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
 pub trait Dialect {
-    fn identifier_quote_style(&self) -> Option<char>;
+    fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
 }
 pub struct DefaultDialect {}
 
 impl Dialect for DefaultDialect {
-    fn identifier_quote_style(&self) -> Option<char> {
-        Some('"')
+    fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
+        let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
+        if ALL_KEYWORDS.contains(&identifier.to_uppercase().as_str())
+            || !identifier_regex.is_match(identifier)
+        {
+            Some('"')
+        } else {
+            None
+        }
     }
 }
 
 pub struct PostgreSqlDialect {}
 
 impl Dialect for PostgreSqlDialect {
-    fn identifier_quote_style(&self) -> Option<char> {
+    fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('"')
     }
 }
 
 pub struct MySqlDialect {}
 
 impl Dialect for MySqlDialect {
-    fn identifier_quote_style(&self) -> Option<char> {
+    fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('`')
     }
 }
 
 pub struct SqliteDialect {}
 
 impl Dialect for SqliteDialect {
-    fn identifier_quote_style(&self) -> Option<char> {
+    fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('`')
     }
 }
@@ -67,7 +80,7 @@ impl CustomDialect {
 }
 
 impl Dialect for CustomDialect {
-    fn identifier_quote_style(&self) -> Option<char> {
+    fn identifier_quote_style(&self, _: &str) -> Option<char> {
         self.identifier_quote_style
     }
 }