Skip to content

Commit

Permalink
rs: Improve error message by collecting expected tokens
Browse files Browse the repository at this point in the history
When an unexpected input is found, the parser will backtrack and try
other options until it exhausts all the posibilities encoded in the
grammar.  With this change, the list of tokens attempted is now
collected and used to display a slightly more detailed message when
on error.
  • Loading branch information
clarete committed Jun 20, 2024
1 parent e8abde0 commit 9100e46
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 28 deletions.
106 changes: 87 additions & 19 deletions langlang_lib/src/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// machine. This module has nothing to do with how patterns get
// compiled to programs, but how programs get executted as patterns.
//
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};

use crate::consts::WHITE_SPACE_RULE_NAME;

Expand Down Expand Up @@ -366,6 +366,12 @@ pub struct VM<'a> {
captures: Vec<CapStackFrame>,
// boolean flag that remembers if the VM is within a predicate
within_predicate: bool,
// expected_set keeps tabs on which tokens are expected but didn't
// match the current tokens under the cursor
expected_set: HashSet<String>,
// expected_vec contains the ordered list of tokens that are
// expected but didn't match the current token under the cursor
expected_vec: Vec<String>,
}

impl<'a> VM<'a> {
Expand All @@ -383,6 +389,8 @@ impl<'a> VM<'a> {
lrmemo: HashMap::new(),
captures: vec![],
within_predicate: false,
expected_set: HashSet::new(),
expected_vec: vec![],
}
}

Expand All @@ -393,10 +401,37 @@ impl<'a> VM<'a> {
let start = c.span().start;
self.line = start.line;
self.column = start.column;
Ok(())
}

fn ffp_err(&mut self, expected: Value) -> Error {
// update the farther failure position if it is behind where
// the cursor currently is. If we match that condition, we'll
// also reset the set of expected tokens.
if self.cursor > self.ffp {
self.ffp = self.cursor;
self.expected_set = HashSet::new();
self.expected_vec = Vec::new();
}
Ok(())

// add the new term to the set of expected tokens that haven't
// matched with the input
let e = expected.to_string();
if self.expected_set.get(&e).is_none() {
self.expected_vec.push(format!("'{}'", e));
self.expected_set.insert(e);
}

// fill up the error instance with the appropriate message
return Error::Matching(
self.ffp,
format!("syntax error, expecting: {}", self.expected_vec.join(", ")),
);
}

fn ffp_fail(&mut self, expected: Value) -> Result<(), Error> {
let err = self.ffp_err(expected);
self.fail(err)
}

// stack management
Expand Down Expand Up @@ -528,9 +563,13 @@ impl<'a> VM<'a> {
self.advance_cursor()?;
}
Instruction::Char(expected) => {
let start = self.pos();
self.program_counter += 1;
if self.cursor >= self.source.len() {
self.fail(Error::EOF)?;
self.ffp_fail(value::Char::new_val(
Span::new(start, self.pos()),
expected,
))?;
continue;
}
match &self.source[self.cursor] {
Expand All @@ -539,15 +578,22 @@ impl<'a> VM<'a> {
self.advance_cursor()?;
}
_ => {
self.fail(Error::Matching(self.ffp, expected.to_string()))?;
self.ffp_fail(value::Char::new_val(
Span::new(start, self.pos()),
expected,
))?;
continue;
}
}
}
Instruction::Span(start, end) => {
let start_pos = self.pos();
self.program_counter += 1;
if self.cursor >= self.source.len() {
self.fail(Error::EOF)?;
self.ffp_fail(value::String::new_val(
Span::new(start_pos.clone(), self.pos()),
format!("[{}-{}]", start, end),
))?;
continue;
}
match &self.source[self.cursor] {
Expand All @@ -558,26 +604,34 @@ impl<'a> VM<'a> {
self.advance_cursor()?;
}
_ => {
self.fail(Error::Matching(self.ffp, format!("[{}-{}]", start, end)))?;
self.ffp_fail(value::String::new_val(
Span::new(start_pos.clone(), self.pos()),
format!("[{}-{}]", start, end),
))?;
continue;
}
}
}
Instruction::String(id) => {
self.program_counter += 1;
let expected = self.program.string_at(id);
let start = self.pos();

if self.cursor >= self.source.len() {
self.fail(Error::EOF)?;
self.ffp_fail(value::String::new_val(
Span::new(start.clone(), self.pos()),
expected.clone(),
))?;
continue;
}
let expected = self.program.string_at(id);

match &self.source[self.cursor] {
Value::String(ref s) if &s.value == expected => {
self.capture(self.source[self.cursor].clone())?;
self.advance_cursor()?;
continue;
}
_ => {
let start = self.pos();
let mut expected_chars = expected.chars();
match loop {
let current_char = match expected_chars.next() {
Expand All @@ -592,7 +646,10 @@ impl<'a> VM<'a> {
self.advance_cursor()?;
}
_ => {
break Err(Error::Matching(self.ffp, expected.clone()));
break Err(self.ffp_err(value::String::new_val(
Span::new(start.clone(), self.pos()),
expected.clone(),
)));
}
};
} {
Expand Down Expand Up @@ -1067,7 +1124,10 @@ mod tests {
let result = vm.run_str("b");

assert!(result.is_err());
assert_eq!(Error::Matching(0, "a".to_string()), result.unwrap_err());
assert_eq!(
Error::Matching(0, "syntax error, expecting: 'a'".to_string()),
result.unwrap_err()
);
}

// (span.1)
Expand Down Expand Up @@ -1123,7 +1183,10 @@ mod tests {
let result = vm.run_str("9");

assert!(result.is_err());
assert_eq!(Error::Matching(0, "[a-z]".to_string()), result.unwrap_err());
assert_eq!(
Error::Matching(0, "syntax error, expecting: '[a-z]'".to_string()),
result.unwrap_err()
);
}

// (any.1)
Expand Down Expand Up @@ -1241,8 +1304,8 @@ mod tests {

assert!(result.is_err());
assert_eq!(Error::Fail, result.unwrap_err());
// assert!(vm.cursor.is_err());
assert_eq!(1, vm.ffp);
assert_eq!(1, vm.cursor);
assert_eq!(0, vm.ffp);
}

// (ord.1)
Expand Down Expand Up @@ -1274,7 +1337,10 @@ mod tests {

assert!(result.is_err());
// currently shows the last error
assert_eq!(Error::Matching(0, "b".to_string()), result.unwrap_err());
assert_eq!(
Error::Matching(0, "syntax error, expecting: 'a', 'b'".to_string()),
result.unwrap_err()
);
}

// (ord.2)
Expand Down Expand Up @@ -1337,7 +1403,6 @@ mod tests {

assert!(result.is_ok());
assert_eq!(1, vm.cursor);
assert_eq!(1, vm.ffp);
}

// (rep.1)
Expand Down Expand Up @@ -1436,7 +1501,7 @@ mod tests {

assert!(result.is_ok());
assert_eq!(3, vm.cursor);
assert_eq!(3, vm.ffp);
assert_eq!(2, vm.ffp);
}

// (var.2)
Expand Down Expand Up @@ -1474,7 +1539,10 @@ mod tests {
let result = vm.run_str("1+2");

assert!(result.is_err());
assert_eq!(Error::Matching(2, "1".to_string()), result.unwrap_err());
assert_eq!(
Error::Matching(2, "syntax error, expecting: '0', '1'".to_string()),
result.unwrap_err()
);
}

#[test]
Expand Down Expand Up @@ -1717,7 +1785,7 @@ mod tests {

assert!(result.is_err());
assert_eq!(
Error::Matching(5, "abacate".to_string()),
Error::Matching(5, "syntax error, expecting: 'abacate'".to_string()),
result.unwrap_err(),
);
}
Expand Down
4 changes: 2 additions & 2 deletions langlang_value/src/source_map.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#[derive(Clone, Debug, Default, PartialEq, PartialOrd)]
#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Eq, Hash)]
pub struct Position {
/// number of chars have been seen since the begining of the input
pub offset: usize,
Expand All @@ -24,7 +24,7 @@ impl ToString for Position {
}
}

#[derive(Clone, Debug, Default, PartialEq, PartialOrd)]
#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Eq, Hash)]
pub struct Span {
pub start: Position,
pub end: Position,
Expand Down
19 changes: 13 additions & 6 deletions langlang_value/src/value.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use std::string::String as StdString;

use crate::format;
use crate::source_map::Span;

#[derive(Clone, Debug, PartialEq, PartialOrd)]
#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Hash)]
pub enum Value {
Char(Char),
String(String),
Expand Down Expand Up @@ -31,7 +32,13 @@ impl Value {
}
}

#[derive(Clone, Debug, PartialEq, PartialOrd)]
impl ToString for Value {
fn to_string(&self) -> std::string::String {
format::compact(self)
}
}

#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Hash)]
pub struct Char {
pub span: Span,
pub value: char,
Expand All @@ -47,7 +54,7 @@ impl Char {
}
}

#[derive(Clone, Debug, PartialEq, PartialOrd)]
#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Hash)]
pub struct String {
pub span: Span,
pub value: StdString,
Expand All @@ -63,7 +70,7 @@ impl String {
}
}

#[derive(Clone, Debug, PartialEq, PartialOrd)]
#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Hash)]
pub struct List {
pub span: Span,
pub values: Vec<Value>,
Expand All @@ -79,7 +86,7 @@ impl List {
}
}

#[derive(Clone, Debug, PartialEq, PartialOrd)]
#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Hash)]
pub struct Node {
pub span: Span,
pub name: StdString,
Expand All @@ -96,7 +103,7 @@ impl Node {
}
}

#[derive(Clone, Debug, PartialEq, PartialOrd)]
#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Hash)]
pub struct Error {
pub span: Span,
pub label: StdString,
Expand Down
18 changes: 18 additions & 0 deletions tests/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,24 @@ fn test_node_0() {
);
}

// -- Error Reporting ------------------------------------------------------

#[test]
fn test_reporting_0() {
let cc = compiler::Config::default();
let program = compile(&cc, "A <- 'abada' / 'abacate' / 'abadia' / 'aba'", "A");
let result = run_str(&program, "foo");

assert!(result.is_err());
assert_eq!(
result.unwrap_err(),
vm::Error::Matching(
0,
"syntax error, expecting: 'abada', 'abacate', 'abadia', 'aba'".to_string()
)
);
}

// -- Error Recovery -------------------------------------------------------

#[test]
Expand Down
2 changes: 1 addition & 1 deletion tests/wshinsert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ fn is_syntactic_sequence_with_literals() {

// It doesn't expect spaces between the sequence items
helpers::assert_err(
vm::Error::Matching(1, "b".to_string()),
vm::Error::Matching(1, "syntax error, expecting: 'b'".to_string()),
run("Syntactic0", "a b c"),
);
}
Expand Down

0 comments on commit 9100e46

Please sign in to comment.