Skip to content

Commit

Permalink
Allow returning errors from lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
exellentcoin26 committed Sep 25, 2023
1 parent 041dcdf commit b12ba01
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 25 deletions.
8 changes: 6 additions & 2 deletions pango-lexer/src/lexer/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,21 @@ impl Default for InputIterToken {

impl InputIterToken {
/// Creates a new [`InputIterToken`].
pub(self) fn new(token_start: usize) -> Self {
fn new(token_start: usize) -> Self {
Self {
source: String::new(),
pos: (token_start, token_start),
}
}

/// Updates the end position of the token.
pub(self) fn update_pos_end(&mut self, new_end: usize) {
fn update_pos_end(&mut self, new_end: usize) {
self.pos.1 = new_end;
}

pub(super) fn is_empty(&self) -> bool {
self.source.is_empty()
}
}

impl Iterator for InputIter<'_> {
Expand Down
88 changes: 66 additions & 22 deletions pango-lexer/src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
use self::input::{InputIter, InputIterToken};
use self::{
error::{LexError, LexResult},

Check failure on line 2 in pango-lexer/src/lexer/mod.rs

View workflow job for this annotation

GitHub Actions / clippy

unresolved imports `self::error::LexError`, `self::error::LexResult`

error[E0432]: unresolved imports `self::error::LexError`, `self::error::LexResult` --> pango-lexer/src/lexer/mod.rs:2:13 | 2 | error::{LexError, LexResult}, | ^^^^^^^^ ^^^^^^^^^ no `LexResult` in `lexer::error` | | | no `LexError` in `lexer::error`
input::{InputIter, InputIterToken},
};
use crate::{
fsm::{NDSimulate, Nfa, NfaCompiler, Simulatable, Simulate, StateId},
regex::{self, parser::error::ParseResult},
};

use std::collections::BTreeMap;
use std::{collections::BTreeMap, iter::FusedIterator};

mod error;

Check failure on line 12 in pango-lexer/src/lexer/mod.rs

View workflow job for this annotation

GitHub Actions / clippy

file not found for module `error`

error[E0583]: file not found for module `error` --> pango-lexer/src/lexer/mod.rs:12:1 | 12 | mod error; | ^^^^^^^^^^ | = help: to create the module `error`, create file "pango-lexer/src/lexer/error.rs" or "pango-lexer/src/lexer/error/mod.rs"
mod input;

/// Finite-state machine based lexer.
Expand Down Expand Up @@ -63,7 +67,7 @@ where
for<'a> Fsm: Simulatable + 'a,
for<'a> Fsm::Simulator<'a>: NDSimulate,
{
type Item = Token<TokenKind>;
type Item = LexResult<Token<TokenKind>>;

fn next(&mut self) -> Option<Self::Item> {
let mut sim = self.fsm.to_simulator();
Expand All @@ -73,8 +77,16 @@ where
if !sim.can_feed(ch) {
// the current token is the longest token we can 'munch'

return token_kind.map(|token_kind| {
Token::from_input_iter_token(self.iter.consume_token(), token_kind)
return Some(match token_kind {
Some(token_kind) => Ok(Token::from_input_iter_token(
self.iter.consume_token(),
token_kind,
)),
None => {
// tokenizer could not identify the token
let token = self.iter.get_token();
Err(LexError(token.pos.0, token.pos.1))
}
});
}

Expand All @@ -85,7 +97,7 @@ where

// Get the first final state the Simulator is in.
//
// Note: this assumes that the lower final state ids, have the highest
// NOTE: this assumes that the lower final state ids, have the highest
// precedence. This is enforced by the order of expression
// compilation.
let final_state = sim
Expand All @@ -110,16 +122,35 @@ where
}
}

if sim.is_accepting() {
if dbg!(sim.is_accepting()) {
token_kind.map(|token_kind| {
Token::from_input_iter_token(self.iter.consume_token(), token_kind)
Ok(Token::from_input_iter_token(
self.iter.consume_token(),
token_kind,
))
})
} else {
None
// check if characters are remaining
self.iter.accept_suffix();
let token = self.iter.get_token();

if token.is_empty() {
None
} else {
let token = self.iter.consume_token();
Some(Err(LexError(token.pos.0, token.pos.1)))
}
}
}
}

impl<TokenKind, Fsm> FusedIterator for Lexer<'_, TokenKind, Fsm>
where
for<'a> Fsm: Simulatable + 'a,
for<'a> Fsm::Simulator<'a>: NDSimulate,
{
}

impl<TokenKind> Lexer<'_, TokenKind, Nfa> {
/// Creates a `LexerGenerator`.
pub fn builder() -> LexerGenerator<TokenKind> {
Expand Down Expand Up @@ -247,7 +278,7 @@ mod tests {
macro_rules! assert_eq_tokens {
($lhs:expr, $rhs:expr) => {
for (expected, actual) in $lhs.into_iter().zip($rhs) {
assert_eq!(expected, actual);
assert_eq!(Ok(expected), actual);
}
};
}
Expand All @@ -266,22 +297,24 @@ mod tests {
};
}

use crate::lexer::error::LexError;

use super::{Lexer, Token};

#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
enum Foo {
#[default]
A,
B,
C,
D {
len: usize,
},
E,
}

#[test]
fn lexer() -> Result<(), Box<dyn std::error::Error>> {
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
enum Foo {
#[default]
A,
B,
C,
D {
len: usize,
},
E,
}

let tokens = Lexer::builder()
.with_token_unit("aaaa", Foo::A)?
.with_token_unit("b{4,}", Foo::B)?
Expand All @@ -308,4 +341,15 @@ mod tests {

Ok(())
}

#[test]
fn token_error() -> Result<(), Box<dyn std::error::Error>> {
let tokens = Lexer::builder()
.with_token_unit("aa", Foo::A)?
.tokenize("aaa");

assert_eq!(tokens.last(), Some(Err(LexError(2, 3))));

Ok(())
}
}
7 changes: 6 additions & 1 deletion pango-parser/src/parser/slr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@ where
state: current_state,
}]);

while let Some(token) = next_token.or_else(|| input.next()) {
while let Some(token) = next_token.map(Ok).or_else(|| input.next()) {
let token = match token {
Ok(token) => token,
Err(_) => return Err(()),
};

next_token = Some(token);
let (parse_node, accept) =
self.handle_token(&mut next_token, &mut stack, current_state)?;
Expand Down

0 comments on commit b12ba01

Please sign in to comment.