Skip to content

Commit

Permalink
Merge pull request #7071 from andylokandy/dialect
Browse files Browse the repository at this point in the history
feat(parser): add mysql dialect
  • Loading branch information
mergify[bot] committed Aug 10, 2022
2 parents dc289e0 + d8c99fb commit a7f2c58
Show file tree
Hide file tree
Showing 21 changed files with 102 additions and 45 deletions.
6 changes: 3 additions & 3 deletions common/ast/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl<'a> nom::error::ParseError<Input<'a>> for Error<'a> {
span: i[0].clone(),
errors: vec![],
contexts: vec![],
backtrace: i.1,
backtrace: i.2,
}
}

Expand Down Expand Up @@ -122,7 +122,7 @@ impl<'a> nom::error::ContextError<Input<'a>> for Error<'a> {

impl<'a> Error<'a> {
pub fn from_error_kind(input: Input<'a>, kind: ErrorKind) -> Self {
let mut inner = input.1.inner.borrow_mut();
let mut inner = input.2.inner.borrow_mut();
if let Some(ref mut inner) = *inner {
match input.0[0].span.start.cmp(&inner.span.span.start) {
Ordering::Equal => {
Expand All @@ -147,7 +147,7 @@ impl<'a> Error<'a> {
span: input.0[0].clone(),
errors: vec![kind],
contexts: vec![],
backtrace: input.1,
backtrace: input.2,
}
}
}
Expand Down
32 changes: 28 additions & 4 deletions common/ast/src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::Backtrace;
/// Input tokens slice with a backtrace that records all errors including
/// the optional branch.
#[derive(Debug, Clone, Copy)]
pub struct Input<'a>(pub &'a [Token<'a>], pub &'a Backtrace<'a>);
pub struct Input<'a>(pub &'a [Token<'a>], pub Dialect, pub &'a Backtrace<'a>);

impl<'a> std::ops::Deref for Input<'a> {
type Target = [Token<'a>];
Expand All @@ -50,19 +50,19 @@ impl<'a> nom::Offset for Input<'a> {

impl<'a> nom::Slice<Range<usize>> for Input<'a> {
fn slice(&self, range: Range<usize>) -> Self {
Input(&self.0[range], self.1)
Input(&self.0[range], self.1, self.2)
}
}

impl<'a> nom::Slice<RangeTo<usize>> for Input<'a> {
fn slice(&self, range: RangeTo<usize>) -> Self {
Input(&self.0[range], self.1)
Input(&self.0[range], self.1, self.2)
}
}

impl<'a> nom::Slice<RangeFrom<usize>> for Input<'a> {
fn slice(&self, range: RangeFrom<usize>) -> Self {
Input(&self.0[range], self.1)
Input(&self.0[range], self.1, self.2)
}
}

Expand All @@ -77,3 +77,27 @@ pub struct WithSpan<'a, T> {
pub(crate) span: Input<'a>,
pub(crate) elem: T,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum Dialect {
#[default]
PostgreSQL,
MySQL,
}

impl Dialect {
pub fn is_ident_quote(&self, c: char) -> bool {
match self {
Dialect::MySQL => c == '`',
// TODO: remove '`' quote support once mysql handler correctly set mysql dialect.
Dialect::PostgreSQL => c == '"' || c == '`',
}
}

pub fn is_string_quote(&self, c: char) -> bool {
match self {
Dialect::MySQL => c == '\'' || c == '"',
Dialect::PostgreSQL => c == '\'',
}
}
}
1 change: 1 addition & 0 deletions common/ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub use visitors::Visitor;
pub use visitors::VisitorMut;

mod input;
pub use input::Dialect;
pub use input::Input;

mod util;
Expand Down
8 changes: 7 additions & 1 deletion common/ast/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,13 @@ pub fn literal_string(i: Input) -> IResult<String> {
QuotedString
},
|token| {
if token.text().starts_with('\'') {
if token
.text()
.chars()
.next()
.filter(|c| i.1.is_string_quote(*c))
.is_some()
{
let str = &token.text()[1..token.text().len() - 1];
let unescaped =
unescape(str, '\'').ok_or(ErrorKind::Other("invalid escape or unicode"))?;
Expand Down
10 changes: 7 additions & 3 deletions common/ast/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use common_exception::Result;
use self::expr::subexpr;
use crate::ast::Expr;
use crate::ast::Statement;
use crate::input::Dialect;
use crate::input::Input;
use crate::parser::statement::statement;
use crate::parser::token::Token;
Expand All @@ -40,9 +41,10 @@ pub fn tokenize_sql(sql: &str) -> Result<Vec<Token>> {
/// Parse a SQL string into `Statement`s.
pub fn parse_sql<'a>(
sql_tokens: &'a [Token<'a>],
dialect: Dialect,
backtrace: &'a Backtrace<'a>,
) -> Result<(Statement<'a>, Option<String>)> {
match statement(Input(sql_tokens, backtrace)) {
match statement(Input(sql_tokens, dialect, backtrace)) {
Ok((rest, stmts)) if rest[0].kind == TokenKind::EOI => Ok((stmts.stmt, stmts.format)),
Ok((rest, _)) => Err(ErrorCode::SyntaxException(
rest[0].display_error("unable to parse rest of the sql".to_string()),
Expand All @@ -57,9 +59,10 @@ pub fn parse_sql<'a>(
/// Parse udf function into Expr
pub fn parse_expr<'a>(
sql_tokens: &'a [Token<'a>],
dialect: Dialect,
backtrace: &'a Backtrace<'a>,
) -> Result<Expr<'a>> {
match expr::expr(Input(sql_tokens, backtrace)) {
match expr::expr(Input(sql_tokens, dialect, backtrace)) {
Ok((rest, expr)) if rest[0].kind == TokenKind::EOI => Ok(expr),
Ok((rest, _)) => Err(ErrorCode::SyntaxException(
rest[0].display_error("unable to parse rest of the sql".to_string()),
Expand All @@ -73,10 +76,11 @@ pub fn parse_expr<'a>(

pub fn parse_comma_separated_exprs<'a>(
sql_tokens: &'a [Token<'a>],
dialect: Dialect,
backtrace: &'a Backtrace<'a>,
) -> Result<Vec<Expr<'a>>> {
let mut comma_separated_exprs_parser = comma_separated_list0(subexpr(0));
match comma_separated_exprs_parser(Input(sql_tokens, backtrace)) {
match comma_separated_exprs_parser(Input(sql_tokens, dialect, backtrace)) {
Ok((_rest, exprs)) => Ok(exprs),
Err(nom::Err::Error(err) | nom::Err::Failure(err)) => {
Err(ErrorCode::SyntaxException(err.display_error(())))
Expand Down
20 changes: 13 additions & 7 deletions common/ast/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,23 @@ fn non_reserved_identifier(
),
move |i| {
match_token(QuotedString)(i).and_then(|(i2, token)| {
if token.text().starts_with('\'') {
Err(nom::Err::Error(Error::from_error_kind(
i,
ErrorKind::ExpectToken(Ident),
)))
} else {
if token
.text()
.chars()
.next()
.filter(|c| i.1.is_ident_quote(*c))
.is_some()
{
Ok((i2, Identifier {
span: token.clone(),
name: token.text()[1..token.text().len() - 1].to_string(),
quote: Some(token.text().chars().next().unwrap()),
}))
} else {
Err(nom::Err::Error(Error::from_error_kind(
i,
ErrorKind::ExpectToken(Ident),
)))
}
})
},
Expand Down Expand Up @@ -333,7 +339,7 @@ where
.map_err(nom::Err::Error)?;
if let Some(elem) = iter.peek() {
// Rollback parsing footprint on unused expr elements.
input.1.clear();
input.2.clear();
Ok((input.slice(input.offset(&elem.span)..), expr))
} else {
Ok((rest, expr))
Expand Down
11 changes: 6 additions & 5 deletions common/ast/tests/it/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use common_ast::parser::token::*;
use common_ast::parser::tokenize_sql;
use common_ast::rule;
use common_ast::Backtrace;
use common_ast::Dialect;
use common_ast::DisplayError;
use common_ast::Input;
use common_exception::Result;
Expand All @@ -33,7 +34,7 @@ macro_rules! run_parser {
let backtrace = Backtrace::new();
let parser = $parser;
let mut parser = rule! { #parser ~ &EOI };
match parser.parse(Input(&tokens, &backtrace)) {
match parser.parse(Input(&tokens, Dialect::PostgreSQL, &backtrace)) {
Ok((i, (output, _))) => {
assert_eq!(i[0].kind, TokenKind::EOI);
writeln!($file, "---------- Input ----------").unwrap();
Expand Down Expand Up @@ -109,7 +110,7 @@ fn test_statement() {
r#"select * from t4;"#,
r#"select * from aa.bb;"#,
r#"select * from a, b, c;"#,
r#"select * from a, b, c order by `db`.`a`.`c1`;"#,
r#"select * from a, b, c order by "db"."a"."c1";"#,
r#"select * from a join b on a.a = b.a;"#,
r#"select * from a left outer join b on a.a = b.a;"#,
r#"select * from a right outer join b on a.a = b.a;"#,
Expand Down Expand Up @@ -269,7 +270,7 @@ fn test_statement() {
for case in cases {
let tokens = tokenize_sql(case).unwrap();
let backtrace = Backtrace::new();
let (stmt, fmt) = parse_sql(&tokens, &backtrace).unwrap();
let (stmt, fmt) = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace).unwrap();
writeln!(file, "---------- Input ----------").unwrap();
writeln!(file, "{}", case).unwrap();
writeln!(file, "---------- Output ---------").unwrap();
Expand Down Expand Up @@ -325,7 +326,7 @@ fn test_statement_error() {
for case in cases {
let tokens = tokenize_sql(case).unwrap();
let backtrace = Backtrace::new();
let err = parse_sql(&tokens, &backtrace).unwrap_err();
let err = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace).unwrap_err();
writeln!(file, "---------- Input ----------").unwrap();
writeln!(file, "{}", case).unwrap();
writeln!(file, "---------- Output ---------").unwrap();
Expand Down Expand Up @@ -426,7 +427,7 @@ fn test_expr() {
r#"1 - -(- - -1)"#,
r#"1 + a * c.d"#,
r#"number % 2"#,
r#"`t`:k1.k2"#,
r#""t":k1.k2"#,
r#"col1 not between 1 and 2"#,
r#"sum(col1)"#,
r#""random"()"#,
Expand Down
6 changes: 3 additions & 3 deletions common/ast/tests/it/testdata/expr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1116,9 +1116,9 @@ BinaryOp {


---------- Input ----------
`t`:k1.k2
"t":k1.k2
---------- Output ---------
`t`:k1.k2
"t":k1.k2
---------- AST ------------
MapAccess {
span: [
Expand All @@ -1139,7 +1139,7 @@ MapAccess {
column: Identifier {
name: "t",
quote: Some(
'`',
'"',
),
span: QuotedString(0..3),
},
Expand Down
10 changes: 5 additions & 5 deletions common/ast/tests/it/testdata/statement.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2027,9 +2027,9 @@ Query(


---------- Input ----------
select * from a, b, c order by `db`.`a`.`c1`;
select * from a, b, c order by "db"."a"."c1";
---------- Output ---------
SELECT * FROM a, b, c ORDER BY `db`.`a`.`c1`
SELECT * FROM a, b, c ORDER BY "db"."a"."c1"
---------- AST ------------
Query(
Query {
Expand Down Expand Up @@ -2134,7 +2134,7 @@ Query(
Identifier {
name: "db",
quote: Some(
'`',
'"',
),
span: QuotedString(31..35),
},
Expand All @@ -2143,15 +2143,15 @@ Query(
Identifier {
name: "a",
quote: Some(
'`',
'"',
),
span: QuotedString(36..39),
},
),
column: Identifier {
name: "c1",
quote: Some(
'`',
'"',
),
span: QuotedString(40..44),
},
Expand Down
3 changes: 2 additions & 1 deletion common/expression/tests/it/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use common_ast::parser::parse_expr;
use common_ast::parser::token::Token;
use common_ast::parser::tokenize_sql;
use common_ast::Backtrace;
use common_ast::Dialect;
use common_expression::types::DataType;
use common_expression::Literal;
use common_expression::RawExpr;
Expand All @@ -25,7 +26,7 @@ use common_expression::Span;
pub fn parse_raw_expr(text: &str, columns: &[(&str, DataType)]) -> RawExpr {
let backtrace = Backtrace::new();
let tokens = tokenize_sql(text).unwrap();
let expr = parse_expr(&tokens, &backtrace).unwrap();
let expr = parse_expr(&tokens, Dialect::PostgreSQL, &backtrace).unwrap();
transform_expr(expr, columns)
}

Expand Down
3 changes: 2 additions & 1 deletion common/functions-v2/tests/it/scalars/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use common_ast::parser::parse_expr;
use common_ast::parser::token::Token;
use common_ast::parser::tokenize_sql;
use common_ast::Backtrace;
use common_ast::Dialect;
use common_expression::types::DataType;
use common_expression::Literal;
use common_expression::RawExpr;
Expand All @@ -25,7 +26,7 @@ use common_expression::Span;
pub fn parse_raw_expr(text: &str, columns: &[(&str, DataType)]) -> RawExpr {
let backtrace = Backtrace::new();
let tokens = tokenize_sql(text).unwrap();
let expr = parse_expr(&tokens, &backtrace).unwrap();
let expr = parse_expr(&tokens, Dialect::PostgreSQL, &backtrace).unwrap();
transform_expr(expr, columns)
}

Expand Down
5 changes: 3 additions & 2 deletions query/src/sql/planner/binder/copy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use common_ast::ast::Statement;
use common_ast::parser::parse_sql;
use common_ast::parser::tokenize_sql;
use common_ast::Backtrace;
use common_ast::Dialect;
use common_exception::ErrorCode;
use common_exception::Result;
use common_meta_types::UserStageInfo;
Expand Down Expand Up @@ -325,7 +326,7 @@ impl<'a> Binder {
format!("SELECT * FROM {src_catalog_name}.{src_database_name}.{src_table_name}");
let tokens = tokenize_sql(&subquery)?;
let backtrace = Backtrace::new();
let sub_stmt_msg = parse_sql(&tokens, &backtrace)?;
let sub_stmt_msg = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace)?;
let sub_stmt = sub_stmt_msg.0;
let query = match &sub_stmt {
Statement::Query(query) => {
Expand Down Expand Up @@ -370,7 +371,7 @@ impl<'a> Binder {
format!("SELECT * FROM {src_catalog_name}.{src_database_name}.{src_table_name}");
let tokens = tokenize_sql(&subquery)?;
let backtrace = Backtrace::new();
let sub_stmt_msg = parse_sql(&tokens, &backtrace)?;
let sub_stmt_msg = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace)?;
let sub_stmt = sub_stmt_msg.0;
let query = match &sub_stmt {
Statement::Query(query) => {
Expand Down
3 changes: 2 additions & 1 deletion query/src/sql/planner/binder/ddl/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use common_ast::parser::parse_sql;
use common_ast::parser::tokenize_sql;
use common_ast::walk_expr_mut;
use common_ast::Backtrace;
use common_ast::Dialect;
use common_datavalues::DataField;
use common_datavalues::DataSchemaRef;
use common_datavalues::DataSchemaRefExt;
Expand Down Expand Up @@ -304,7 +305,7 @@ impl<'a> Binder {
};
let tokens = tokenize_sql(query.as_str())?;
let backtrace = Backtrace::new();
let (stmt, _) = parse_sql(&tokens, &backtrace)?;
let (stmt, _) = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace)?;
self.bind_statement(bind_context, &stmt).await
}

Expand Down
Loading

1 comment on commit a7f2c58

@vercel
Copy link

@vercel vercel bot commented on a7f2c58 Aug 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

databend – ./

databend-databend.vercel.app
databend-git-main-databend.vercel.app
databend.vercel.app
databend.rs

Please sign in to comment.