Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implment backtrack for parser and add a human-friendly pretty printer for errors #4045

Merged
merged 13 commits into from Feb 6, 2022
31 changes: 29 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion common/ast/Cargo.toml
Expand Up @@ -16,17 +16,21 @@ common-exception = { path = "../exception" }
common-functions = { path = "../functions" }

# Github dependencies
# TODO (andylokandy): Use the version from crates.io once
# https://github.com/brendanzab/codespan/pull/331 is released.
codespan-reporting = { git = "https://github.com/brendanzab/codespan", rev = "c84116f5" }
sqlparser = { git = "https://github.com/datafuse-extras/sqlparser-rs", rev = "c33837e" }

# Crates.io dependencies
async-trait = "0.1.52"
logos = "0.12"
nom = "7"
nom-rule = "0.1"
nom-rule = "0.2"
once_cell = "1.9.0"
thiserror = "1.0.30"
pratt = "0.3"

[dev-dependencies]
goldenfile = "1"
pretty_assertions = "1.0.0"
common-base = { path = "../base" }
2 changes: 0 additions & 2 deletions common/ast/src/lib.rs
Expand Up @@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#![feature(trait_alias)]

pub mod error;
pub mod parser;
pub mod udfs;
2 changes: 1 addition & 1 deletion common/ast/src/parser/ast/expression.rs
Expand Up @@ -322,7 +322,7 @@ impl Display for Literal {
write!(f, "{}", val)
}
Literal::String(val) => {
write!(f, "\"{}\"", val)
write!(f, "\'{}\'", val)
}
Literal::Boolean(val) => {
if *val {
Expand Down
152 changes: 152 additions & 0 deletions common/ast/src/parser/rule/error.rs
@@ -0,0 +1,152 @@
// Copyright 2022 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use codespan_reporting::diagnostic::Diagnostic;
use codespan_reporting::diagnostic::Label;
use codespan_reporting::files::SimpleFile;
use codespan_reporting::term;
use codespan_reporting::term::termcolor::Buffer;
use codespan_reporting::term::Chars;
use codespan_reporting::term::Config;

use crate::parser::rule::util::Input;
use crate::parser::token::TokenKind;

/// This error type accumulates errors and their position when backtracking
/// through a parse tree. This take a deepest error at `alt` combinator.
#[derive(Clone, Debug, PartialEq)]
pub struct Error<'a> {
/// List of errors accumulated, containing the affected part of input
/// data, and some context.
pub errors: Vec<(Input<'a>, ErrorKind)>,
}

#[derive(Clone, Debug, PartialEq)]
pub enum ErrorKind {
/// Static string added by the `context` function
Context(&'static str),
/// Error generated by `match_token` function
ExpectToken(TokenKind),
/// Error generated by `match_text` function
ExpectText(&'static str),
/// Error kind given by various nom parsers
Nom(nom::error::ErrorKind),
/// Error generated by std's number parser
ParseIntError(std::num::ParseIntError),
/// Plain text description of an error
Other(&'static str),
}

impl<'a> nom::error::ParseError<Input<'a>> for Error<'a> {
fn from_error_kind(input: Input<'a>, kind: nom::error::ErrorKind) -> Self {
Error {
errors: vec![(input, ErrorKind::Nom(kind))],
}
}

fn append(input: Input<'a>, kind: nom::error::ErrorKind, mut other: Self) -> Self {
other.errors.push((input, ErrorKind::Nom(kind)));
other
}

fn from_char(_: Input<'a>, _: char) -> Self {
unreachable!()
}

// Select the longest parse tree while brancing by the `alt` function.
fn or(self, other: Self) -> Self {
let pos_self = self
.errors
.first()
.and_then(|(input, _)| input.get(0).map(|token| token.span.start))
.unwrap_or(0);
let pos_other = other
.errors
.first()
.and_then(|(input, _)| input.get(0).map(|token| token.span.start))
.unwrap_or(0);

if pos_other > pos_self {
other
} else {
self
}
}
}

impl<'a> nom::error::ContextError<Input<'a>> for Error<'a> {
fn add_context(input: Input<'a>, ctx: &'static str, mut other: Self) -> Self {
other.errors.push((input, ErrorKind::Context(ctx)));
other
}
}

impl<'a> Error<'a> {
pub fn from_error_kind(input: Input<'a>, kind: ErrorKind) -> Self {
Error {
errors: vec![(input, kind)],
}
}
}

pub fn pretty_print_error<'a>(source: &'a str, error: nom::Err<Error<'a>>) -> String {
let mut writer = Buffer::no_color();
let file = SimpleFile::new("SQL", source);
let config = Config {
chars: Chars::ascii(),
before_label_lines: 3,
..Default::default()
};

let error = match error {
nom::Err::Error(error) | nom::Err::Failure(error) => error,
nom::Err::Incomplete(_) => unreachable!(),
};

let mut lables = Vec::new();
for (i, (input, kind)) in error.errors.iter().enumerate() {
let msg = match kind {
ErrorKind::Context(msg) => format!("while parsing {}", msg),
ErrorKind::ExpectToken(token) => format!("expected token <{:?}>", token),
ErrorKind::ExpectText(text) => format!("expected token {:?}", text),
ErrorKind::ParseIntError(err) => {
format!("unable to parse int because it {}", match err.kind() {
std::num::IntErrorKind::InvalidDigit =>
"contains invalid characters".to_string(),
std::num::IntErrorKind::PosOverflow => "positive overflowed".to_string(),
std::num::IntErrorKind::NegOverflow => "negative overflowed".to_string(),
err => format!("{:?}", err),
})
}
ErrorKind::Other(msg) => msg.to_string(),
ErrorKind::Nom(_) => continue,
};

let span = input[0].span.clone();

if i == 0 {
lables.push(Label::primary((), span).with_message(msg));
} else {
lables.push(Label::secondary((), span).with_message(msg));
}
}

let diagnostic = Diagnostic::error().with_labels(lables);

term::emit(&mut writer, &config, &file, &diagnostic).unwrap();

std::str::from_utf8(&writer.into_inner())
.unwrap()
.to_string()
}