diff --git a/Cargo.lock b/Cargo.lock index 0c5d23a..7768a14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12,56 +12,238 @@ dependencies = [ ] [[package]] -name = "algorithm" -version = "0.1.1" +name = "anyhow" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" + +[[package]] +name = "copager" +version = "0.2.0" dependencies = [ - "algorithm_lr1", + "anyhow", + "copager", + "copager_cfg", + "copager_core", + "copager_core_macros", + "copager_ir", + "copager_ir_sexp", + "copager_ir_void", + "copager_lex", + "copager_lex_regex", + "copager_parse", + "copager_parse_lr1", + "serde", + "serde_json", ] [[package]] -name = "algorithm_lr1" -version = "0.1.1" +name = "copager_cfg" +version = "0.2.0" dependencies = [ "anyhow", - "core", - "itertools", + "thiserror", +] + +[[package]] +name = "copager_core" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_core", + "copager_ir", + "copager_ir_void", + "copager_lex", + "copager_lex_regex", + "copager_parse", + "copager_parse_lr1", + "copager_utils", "serde", + "serde_cbor", "thiserror", ] [[package]] -name = "anyhow" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" +name = "copager_core_macros" +version = "0.2.0" +dependencies = [ + "anyhow", + "proc-macro2", + "quote", + "syn", + "thiserror", +] [[package]] -name = "core" -version = "0.1.1" +name = "copager_ir" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "copager_parse", + "thiserror", +] + +[[package]] +name = "copager_ir_sexp" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_ir", + "copager_ir_sexp", + "copager_lex", + "copager_lex_regex", + "copager_parse", + "copager_parse_lr1", + "thiserror", +] + +[[package]] +name = "copager_ir_void" +version = "0.2.0" dependencies = [ "anyhow", - "core_derive", + "copager_cfg", + "copager_ir", + "copager_lex", + "copager_parse", + "thiserror", +] + +[[package]] +name = "copager_lex" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex_derive", + "copager_utils", + "thiserror", +] + +[[package]] +name = "copager_lex_derive" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "proc-macro2", + "quote", + "syn", + "thiserror", +] + +[[package]] +name = "copager_lex_regex" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "copager_lex_regex", + "copager_utils", "regex", - "serde", + "regex-macro", "thiserror", ] [[package]] -name = "core_derive" -version = "0.1.1" +name = "copager_parse" +version = "0.2.0" dependencies = [ "anyhow", + "copager_cfg", + "copager_lex", + "copager_parse_derive", + "copager_utils", + "thiserror", +] + +[[package]] +name = "copager_parse_derive" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "copager_parse", "proc-macro2", "quote", "syn", "thiserror", ] +[[package]] +name = "copager_parse_lr1" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_core", + "copager_lex", + "copager_lex_regex", + "copager_parse", + "copager_parse_lr1", + "copager_utils", + "itertools", + "serde", + "thiserror", +] + +[[package]] +name = "copager_utils" +version = "0.1.1" +dependencies = [ + "anyhow", + "serde", + "thiserror", +] + [[package]] name = "either" -version = "1.11.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "example_oneshot" +version = "0.1.0" +dependencies = [ + "anyhow", + "copager", + "thiserror", +] + +[[package]] +name = "example_prebuild" +version = "0.1.0" +dependencies = [ + "anyhow", + "copager", + "example_prebuild_grammar", + "serde", + "thiserror", +] + +[[package]] +name = "example_prebuild_grammar" +version = "0.1.0" +dependencies = [ + "anyhow", + "copager", + "serde", + "thiserror", +] + +[[package]] +name = "half" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" [[package]] name = "itertools" @@ -85,16 +267,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] -name = "parsergen" -version = "0.1.1" -dependencies = [ - "algorithm", - "anyhow", - "core", - "serde", - "serde_json", - "thiserror", -] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "proc-macro2" @@ -137,6 +313,16 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-macro" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12fa36e7add16db296640bba993a65dae2a0088a8e5cd5f935c8bfbd3710145b" +dependencies = [ + "once_cell", + "regex", +] + [[package]] name = "regex-syntax" version = "0.8.3" @@ -158,6 +344,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.202" @@ -171,11 +367,12 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] diff --git a/Cargo.toml b/Cargo.toml index aac0baa..61469d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,35 +1,71 @@ +cargo-features = ["edition2024"] + [package] -name = "parsergen" -version = "0.1.1" -edition = "2021" +name = "copager" +version = "0.2.0" +edition = "2024" [dependencies] -anyhow = { workspace = true } -thiserror = { workspace = true } -pgen_core = { workspace = true } -pgen_algorithm = { workspace = true } +serde_json = { version = "1.0.117", optional = true } +copager_core = { path = "./crates/core" } +copager_core_macros = { path = "./crates/core_macros" } +copager_cfg = { path = "./crates/cfg" } +copager_lex = { path = "./crates/lex", optional = true } +copager_lex_regex = { path = "./crates/lex_regex", optional = true } +copager_parse = { path = "./crates/parse", optional = true } +copager_parse_lr1 = { path = "./crates/parse_lr1", optional = true } +copager_ir = { path = "./crates/ir" } +copager_ir_void = { path = "./crates/ir_void", optional = true } +copager_ir_sexp = { path = "./crates/ir_sexp", optional = true } [dev-dependencies] +anyhow = { workspace = true } serde = { workspace = true } serde_json = "1.0.117" +copager = { path = ".", features = ["all"] } [features] -default = [] -derive = ["pgen_core/derive"] +# common +default = ["dep:copager_lex", "dep:copager_parse"] +all = ["prebuild", "derive", "regexlex", "lr1", "void", "sexp"] +prebuild = ["dep:serde_json"] +derive = ["copager_lex/derive", "copager_parse/derive"] + +# lex +regexlex = ["dep:copager_lex_regex"] + +# parse +lr1 = ["dep:copager_parse_lr1"] + +# ir +void = ["dep:copager_ir_void"] +sexp = ["dep:copager_ir_sexp"] [workspace] resolver = "2" members = [ + # Copager "./crates/core", - "./crates/algorithm", + "./crates/core_macros", + "./crates/cfg", + "./crates/lex", + "./crates/lex_derive", + "./crates/lex_regex", + "./crates/parse", + "./crates/parse_derive", + "./crates/parse_lr1", + "./crates/ir", + "./crates/ir_void", + "./crates/ir_sexp", + "./crates/utils", + + # Examples + "./examples/oneshot", + "./examples/prebuild", ] exclude = [] [workspace.dependencies] anyhow = "1.0.82" thiserror = "1.0.58" -serde = "1.0.197" -regex = "1.10.4" -regex-macro = "0.2.0" -pgen_core = { package = "core", path = "./crates/core" } -pgen_algorithm = { package = "algorithm", path = "./crates/algorithm" } +serde = { version = "1.0.197", features = ["derive"] } diff --git a/README.md b/README.md index 721eec6..0c40792 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,44 @@ -# Parsergen +# Copager Rust製パーサジェネレータ ## Features +### Common + +- `all` - `derive` +- `prebuild` + +### Lex + +- `regexlex` : [crates/lex_regex](crates/lex_regex) + +### Parse + +- `lr1` : [crates/parse_lr1](crates/parse_lr1) + +### IR + +- `void` : [crates/ir_void](crates/ir_void) +- `sexp` : [crates/ir_sexp](crates/ir_sexp) ## Examples -[examples/expr.rs](examples/expr.rs) +### One-shot + +[examples/oneshot](examples/oneshot) + +``` +$ echo "10 * (20 + 30)" | cargo run -p example_oneshot +Success : (Expr (Term (Term (Num "10")) "*" (Num "(" (Expr (Expr (Term (Num "20"))) "+" (Term (Num "30"))) ")"))) +``` + +### Pre-build + +[examples/prebuild](examples/prebuild) ``` -$ cargo run --example expr -(10+20)/((30*40)-50) -Accepted : (Expr (Term (Term (Num "(" (Expr (Expr (Term (Num "10"))) "+" (Term (Num "20"))) ")")) "/" (Num "(" (Expr (Expr (Term (Num "(" (Expr (Term (Term (Num "30")) "*" (Num "40"))) ")"))) "-" (Term (Num "50"))) ")"))) - -$ cargo run --example expr -10** ------ - 1: 10** - ^ here -Error at line 1, column 4. ------ - -Rejected : Unexpected token "Mul" found +$ echo "10 * (20 + 30)" | cargo run -p example_prebuild +Success : (Expr (Term (Term (Num "10")) "*" (Num "(" (Expr (Expr (Term (Num "20"))) "+" (Term (Num "30"))) ")"))) ``` diff --git a/crates/algorithm/Cargo.toml b/crates/algorithm/Cargo.toml deleted file mode 100644 index 0db6c8a..0000000 --- a/crates/algorithm/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "algorithm" -version = "0.1.1" -edition = "2021" - -[dependencies] -lr1 = { package = "algorithm_lr1", path = "../algorithm_lr1" } diff --git a/crates/algorithm/src/lib.rs b/crates/algorithm/src/lib.rs deleted file mode 100644 index 80e1952..0000000 --- a/crates/algorithm/src/lib.rs +++ /dev/null @@ -1 +0,0 @@ -pub use lr1::LR1; diff --git a/crates/algorithm_lr1/Cargo.toml b/crates/algorithm_lr1/Cargo.toml deleted file mode 100644 index 0380016..0000000 --- a/crates/algorithm_lr1/Cargo.toml +++ /dev/null @@ -1,11 +0,0 @@ -[package] -name = "algorithm_lr1" -version = "0.1.1" -edition = "2021" - -[dependencies] -anyhow = { workspace = true } -thiserror = { workspace = true } -serde = { workspace = true, features = ["derive"] } -itertools = "0.12.1" -pgen_core = { package = "core", path = "../core", features = ["derive"] } diff --git a/crates/algorithm_lr1/src/driver.rs b/crates/algorithm_lr1/src/driver.rs deleted file mode 100644 index c484233..0000000 --- a/crates/algorithm_lr1/src/driver.rs +++ /dev/null @@ -1,67 +0,0 @@ -use pgen_core::cfg::{TokenSet, Syntax}; -use pgen_core::lex::Token; -use pgen_core::parse::{SExp, SExpBuilder}; - -use crate::error::ParseError; -use crate::builder::{LRAction, LR1Configure}; - -pub(super) struct LR1Driver<'a, 'b, T, S> (&'b LR1Configure<'a, T, S>) -where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>; - -impl<'a, 'b, T, S> LR1Driver<'a, 'b, T, S> -where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>, -{ - pub fn new(configure: &'b LR1Configure<'a, T, S>) -> LR1Driver<'a, 'b, T, S> { - LR1Driver(configure) - } - - pub fn run<'c>( - &self, - lexer: &mut impl Iterator>, - ) -> anyhow::Result> { - let mut stack = vec![0]; - let mut builder = SExpBuilder::new(); - loop { - let input = lexer.next(); - loop { - let top = stack[stack.len() - 1]; - let action = match input { - Some(token) => ( - self.0.action_table[top].get(&token.kind).unwrap(), - Some(token), - ), - None => ( - &self.0.eof_action_table[top], - None - ), - }; - match action { - (LRAction::Shift(new_state), Some(token)) => { - stack.push(*new_state); - builder.push(token); - break; - } - (LRAction::Reduce(tag, goto, elems_cnt), _) => { - stack.truncate(stack.len() - elems_cnt); - stack.push(self.0.goto_table[stack[stack.len() - 1]][*goto]); - builder.wrap(*tag, *elems_cnt); - } - (LRAction::Accept, _) => { - return builder.build(); - } - (LRAction::None, Some(token)) => { - return Err(ParseError::new_unexpected_token(token).into()); - } - (LRAction::None, None) => { - return Err(ParseError::UnexpectedEOF.into()); - } - _ => unreachable!(), - } - } - } - } -} diff --git a/crates/algorithm_lr1/src/lib.rs b/crates/algorithm_lr1/src/lib.rs deleted file mode 100644 index d8ff6c1..0000000 --- a/crates/algorithm_lr1/src/lib.rs +++ /dev/null @@ -1,120 +0,0 @@ -mod error; -mod builder; -mod driver; - -use serde::{Serialize, Deserialize}; - -use pgen_core::cfg::{TokenSet, Syntax}; -use pgen_core::lex::Token; -use pgen_core::parse::{ParserImpl, SExp}; - -use builder::LR1Configure; -use driver::LR1Driver; - -#[derive(Debug, Serialize, Deserialize)] -pub struct LR1<'a, T, S> (LR1Configure<'a, T, S>) -where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>; - -impl<'a, T, S> ParserImpl<'a> for LR1<'a, T, S> -where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>, -{ - type TokenSet = T; - type Syntax = S; - - fn setup() -> anyhow::Result { - Ok(LR1(LR1Configure::setup()?)) - } - - fn parse<'b>( - &self, - mut lexer: impl Iterator>, - ) -> anyhow::Result> { - LR1Driver::new(&self.0).run(&mut lexer) - } -} - -#[cfg(test)] -mod test { - use pgen_core::cfg::{TokenSet, Syntax, Rule, RuleElem}; - use pgen_core::Parser; - - use super::LR1; - - #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] - enum TestTokenSet { - #[token(regex = r"\+")] - Plus, - #[token(regex = r"-")] - Minus, - #[token(regex = r"\*")] - Mul, - #[token(regex = r"/")] - Div, - #[token(regex = r"\(")] - BracketL, - #[token(regex = r"\)")] - BracketR, - #[token(regex = r"[1-9][0-9]*")] - Num, - #[token(regex = r"[ \t\n]+", ignored)] - _Whitespace, - } - - #[derive(Debug, Clone, Copy, Syntax)] - enum TestSyntax { - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, - } - - #[test] - fn input_ok() { - let inputs = vec![ - "10", - "10 + 20", - "10 - 20", - "10 * 20", - "10 / 20", - "10 + 20 * 30 - 40", - "(10)", - "((((10))))", - "10 * (20 - 30)", - "((10 + 20) * (30 / 40)) - 50", - ]; - - let parser = Parser::>::new().unwrap(); - for input in inputs { - assert!(parser.parse(input).is_ok(), "{}", input); - } - } - - #[test] - fn input_err() { - let inputs = vec![ - "()", - "(10 -", - "10 +", - "*", - "10 20 + 30", - "10 + 20 * 30 / 40 (", - "(((10))", - ]; - - let parser = Parser::>::new().unwrap(); - for input in inputs { - assert!(parser.parse(input).is_err(), "{}", input); - } - } -} diff --git a/crates/cfg/Cargo.toml b/crates/cfg/Cargo.toml new file mode 100644 index 0000000..220fec0 --- /dev/null +++ b/crates/cfg/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "copager_cfg" +version = "0.2.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } diff --git a/crates/cfg/src/lib.rs b/crates/cfg/src/lib.rs new file mode 100644 index 0000000..9ddfc51 --- /dev/null +++ b/crates/cfg/src/lib.rs @@ -0,0 +1,2 @@ +pub mod rule; +pub mod token; diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs new file mode 100644 index 0000000..2f1bd24 --- /dev/null +++ b/crates/cfg/src/rule.rs @@ -0,0 +1,400 @@ +use std::collections::HashMap; +use std::fmt::Debug; +use std::hash::Hash; + +use crate::token::TokenTag; + +pub trait RuleTag +where + Self: Debug + Copy + Clone + Hash + Eq, +{ + fn as_rules(&self) -> Vec>; +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Rule { + pub id: usize, + pub lhs: RuleElem, + pub rhs: Vec>, +} + +impl From<(RuleElem, Vec>)> for Rule { + fn from((lhs, rhs): (RuleElem, Vec>)) -> Self { + Rule { id: 0, lhs, rhs } + } +} + +impl Rule { + pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { + let mut l_nonterms = vec![&self.lhs]; + let r_nonterms: Vec<&RuleElem> = self + .rhs + .iter() + .filter(|token| matches!(token, RuleElem::::NonTerm(_))) + .collect(); + l_nonterms.extend(r_nonterms); + l_nonterms + } + + pub fn terms<'a>(&'a self) -> Vec<&'a RuleElem> { + self.rhs + .iter() + .filter(|token| matches!(token, RuleElem::::Term(_))) + .collect() + } +} + +#[derive(Debug, Clone, Eq)] +pub enum RuleElem { + NonTerm(String), + Term(T), + EOF, +} + +impl Hash for RuleElem { + fn hash(&self, state: &mut H) { + match self { + RuleElem::NonTerm(s) => s.hash(state), + RuleElem::Term(t) => t.hash(state), + RuleElem::EOF => 0.hash(state), + } + } +} + +impl PartialEq for RuleElem { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (RuleElem::NonTerm(s1), RuleElem::NonTerm(s2)) => s1 == s2, + (RuleElem::Term(t1), RuleElem::Term(t2)) => t1 == t2, + (RuleElem::EOF, RuleElem::EOF) => true, + _ => false, + } + } +} + +impl RuleElem { + pub fn new_nonterm>(t: U) -> RuleElem { + RuleElem::NonTerm(t.into()) + } + + pub fn new_term(t: T) -> RuleElem { + RuleElem::Term(t) + } +} + +#[derive(Debug, Clone)] +pub struct RuleSet { + pub top: String, + pub rules: Vec>, +} + +impl FromIterator> for RuleSet { + fn from_iter(rules: I) -> Self + where + I: IntoIterator>, + { + let rules = rules.into_iter().collect::>(); + let top = match &rules[0].lhs { + RuleElem::NonTerm(s) => s.clone(), + _ => unreachable!(), + }; + RuleSet { top, rules } + } +} + +impl RuleSet { + pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { + self.rules.iter().flat_map(|rule| rule.nonterms()).collect() + } + + pub fn terms<'a>(&'a self) -> Vec<&'a RuleElem> { + self.rules.iter().flat_map(|rule| rule.terms()).collect() + } + + pub fn find_rule<'a>(&'a self, target: &RuleElem) -> Vec<&'a Rule> { + self.rules + .iter() + .filter(|rule| &rule.lhs == target) + .collect() + } + + pub fn first_set<'a>(&'a self) -> HashMap<&'a RuleElem, Vec<&'a RuleElem>> { + // 1. Calc a null set + let nulls_set = self.nulls_set(); + + // 2. Initialize a first set + let mut first_set: HashMap<&RuleElem, Vec<&RuleElem>> = HashMap::new(); + first_set.insert(&RuleElem::EOF, vec![&RuleElem::EOF]); + self.terms().into_iter().for_each(|relem| { + first_set.insert(relem, vec![relem]); + }); + self.nonterms().into_iter().for_each(|relem| { + first_set.insert(relem, vec![]); + }); + + // 3. List up candidates from a nonterm set + let mut candidates = vec![]; + for nonterm in self.nonterms() { + let rules = self.find_rule(nonterm); + for rule in rules { + for relem in &rule.rhs { + if &rule.lhs != relem { + candidates.push((nonterm, relem)) + } + if !nulls_set.contains(&relem) { + break; + } + } + } + } + + // 4. Find first set with recursive + let mut updated = true; + while updated { + updated = false; + for (nonterm, candidate) in &candidates { + let found_elems: Vec<&RuleElem> = first_set + .get(candidate) + .unwrap() + .iter() + .filter(|relem| !first_set.get(nonterm).unwrap().contains(relem)) + .copied() + .collect(); + updated = !found_elems.is_empty(); + first_set + .get_mut(nonterm) + .unwrap() + .extend(found_elems.into_iter()); + } + } + + first_set + } + + fn nulls_set<'a>(&'a self) -> Vec<&'a RuleElem> { + // 1. Find null rules + let mut nulls_set: Vec<&RuleElem> = self + .rules + .iter() + .filter(|rule| rule.rhs.is_empty()) + .map(|rule| &rule.lhs) + .collect(); + + // 2. Find null rules with recursive + let mut updated = true; + while updated { + updated = false; + for rule in &self.rules { + if nulls_set.contains(&&rule.lhs) { + continue; + } else if rule.rhs.iter().all(|relem| nulls_set.contains(&relem)) { + nulls_set.push(&rule.lhs); + updated = true; + } else { + continue; + } + } + } + + nulls_set + } +} + +// #[cfg(test)] +// mod test { +// use std::collections::HashMap; + +// use crate::token::TokenTag; +// use crate::RuleKind; + +// use super::{Rule, RuleElem}; + +// #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +// enum TestToken { +// Num, +// Plus, +// Minus, +// Mul, +// Div, +// BracketA, +// BracketB, +// } + +// impl TokenKind<'_> for TestToken { +// fn as_str(&self) -> &'static str { +// match self { +// TestToken::Num => r"^[1-9][0-9]*", +// TestToken::Plus => r"^\+", +// TestToken::Minus => r"^-", +// TestToken::Mul => r"^\*", +// TestToken::Div => r"^/", +// TestToken::BracketA => r"^\(", +// TestToken::BracketB => r"^\)", +// } +// } + +// fn ignore_str() -> &'static str { +// r"^[ \t\n]+" +// } + +// fn into_iter() -> impl Iterator { +// vec![ +// TestToken::Num, +// TestToken::Plus, +// TestToken::Minus, +// TestToken::Mul, +// TestToken::Div, +// TestToken::BracketA, +// TestToken::BracketB, +// ] +// .into_iter() +// } +// } + +// #[derive(Debug, Clone, Hash, PartialEq, Eq)] +// enum TestRule { +// ExprPlus, +// ExprMinus, +// Expr2Term, +// TermMul, +// TermDiv, +// Term2Fact, +// Fact2Expr, +// Fact2Num, +// } + +// impl<'a> RuleKind<'a> for TestRule { +// type TokenKind = TestToken; + +// fn into_iter() -> impl Iterator { +// Box::new( +// vec![ +// TestRule::ExprPlus, +// TestRule::ExprMinus, +// TestRule::Expr2Term, +// TestRule::TermMul, +// TestRule::TermDiv, +// TestRule::Term2Fact, +// TestRule::Fact2Expr, +// TestRule::Fact2Num, +// ] +// .into_iter(), +// ) +// } + +// fn into_rules(&self) -> Vec> { +// let expr_plus = Rule::from(( +// RuleElem::new_nonterm("expr"), +// vec![ +// RuleElem::new_nonterm("expr"), +// RuleElem::new_term(TestToken::Plus), +// RuleElem::new_nonterm("term"), +// ], +// )); + +// let expr_minus = Rule::from(( +// RuleElem::new_nonterm("expr"), +// vec![ +// RuleElem::new_nonterm("expr"), +// RuleElem::new_term(TestToken::Minus), +// RuleElem::new_nonterm("term"), +// ], +// )); + +// let expr_2_term = Rule::::from(( +// RuleElem::new_nonterm("expr"), +// vec![RuleElem::new_nonterm("term")], +// )); + +// let term_mul = Rule::from(( +// RuleElem::new_nonterm("term"), +// vec![ +// RuleElem::new_nonterm("term"), +// RuleElem::new_term(TestToken::Mul), +// RuleElem::new_nonterm("fact"), +// ], +// )); + +// let term_div = Rule::from(( +// RuleElem::new_nonterm("term"), +// vec![ +// RuleElem::new_nonterm("term"), +// RuleElem::new_term(TestToken::Div), +// RuleElem::new_nonterm("fact"), +// ], +// )); + +// let term_2_fact = Rule::::from(( +// RuleElem::new_nonterm("term"), +// vec![RuleElem::new_nonterm("fact")], +// )); + +// let fact_2_expr = Rule::from(( +// RuleElem::new_nonterm("fact"), +// vec![ +// RuleElem::new_term(TestToken::BracketA), +// RuleElem::new_nonterm("expr"), +// RuleElem::new_term(TestToken::BracketB), +// ], +// )); + +// let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![])); + +// match self { +// TestRule::ExprPlus => vec![expr_plus], +// TestRule::ExprMinus => vec![expr_minus], +// TestRule::Expr2Term => vec![expr_2_term], +// TestRule::TermMul => vec![term_mul], +// TestRule::TermDiv => vec![term_div], +// TestRule::Term2Fact => vec![term_2_fact], +// TestRule::Fact2Expr => vec![fact_2_expr], +// TestRule::Fact2Num => vec![fact_2_num], +// } +// } +// } + +// fn check>( +// first_set: &HashMap<&RuleElem, Vec<&RuleElem>>, +// nonterm: T, +// exp_terms: Vec, +// ) { +// let nonterms = RuleElem::::new_nonterm(nonterm); +// let exp_terms: Vec> = exp_terms +// .into_iter() +// .map(|term| RuleElem::new_term(term)) +// .collect(); +// assert!(first_set.get(&nonterms).unwrap().len() == exp_terms.len()); + +// let result = first_set +// .get(&nonterms) +// .unwrap() +// .into_iter() +// .zip(exp_terms.into_iter()) +// .any(|(a, b)| a == &&b); +// assert!(result); +// } + +// #[test] +// fn first_set() { +// let ruleset = ::into_ruleset(); +// let first_set = ruleset.first_set(); + +// check( +// &first_set, +// "expr", +// vec![ +// TestToken::Plus, +// TestToken::Minus, +// TestToken::Mul, +// TestToken::Div, +// TestToken::BracketA, +// ], +// ); +// check( +// &first_set, +// "term", +// vec![TestToken::Mul, TestToken::Div, TestToken::BracketA], +// ); +// check(&first_set, "fact", vec![TestToken::BracketA]); +// } +// } diff --git a/crates/cfg/src/token.rs b/crates/cfg/src/token.rs new file mode 100644 index 0000000..1469f80 --- /dev/null +++ b/crates/cfg/src/token.rs @@ -0,0 +1,27 @@ +use std::fmt::Debug; +use std::hash::Hash; + +pub trait TokenTag +where + Self: Debug + Copy + Clone + Hash + Eq, +{ + fn as_str<'a, 'b>(&'a self) -> &'b str; +} + +#[derive(Debug, Copy, Clone)] +pub struct Token<'input, T: TokenTag> { + pub kind: T, + pub src: &'input str, + pub range: (usize, usize), +} + +impl<'input, T: TokenTag> Token<'input, T> { + pub fn new(kind: T, src: &'input str, range: (usize, usize)) -> Self { + Token { kind, src, range } + } + + pub fn as_str(&self) -> &'input str { + let (l, r) = self.range; + &self.src[l..r] + } +} diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index ce5d477..6d8bf34 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -1,15 +1,23 @@ [package] -name = "core" -version = "0.1.1" +name = "copager_core" +version = "0.2.0" edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde = { workspace = true, features = ["derive"]} -regex = { workspace = true } -pgen_core_derive = { package = "core_derive", path = "../core_derive", optional = true } +serde = { workspace = true } +serde_cbor = "0.11.2" +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_ir = { path = "../ir" } +copager_utils = { path = "../utils" } -[features] -default = [] -derive = ["dep:pgen_core_derive"] +[dev-dependencies] +copager_core = { path = "." } +copager_lex = { path = "../lex", features = ["derive"]} +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_parse_lr1 = { path = "../parse_lr1" } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/core/src/cfg.rs b/crates/core/src/cfg.rs deleted file mode 100644 index c967c91..0000000 --- a/crates/core/src/cfg.rs +++ /dev/null @@ -1,8 +0,0 @@ -mod token; -mod syntax; - -#[cfg(feature = "derive")] -pub use pgen_core_derive::{TokenSet, Syntax}; - -pub use token::TokenSet; -pub use syntax::{Syntax, Rule, RuleElem, RuleSet}; diff --git a/crates/core/src/cfg/syntax.rs b/crates/core/src/cfg/syntax.rs deleted file mode 100644 index a41d085..0000000 --- a/crates/core/src/cfg/syntax.rs +++ /dev/null @@ -1,428 +0,0 @@ -use std::collections::HashMap; -use std::fmt::Debug; -use std::hash::Hash; -use std::marker::PhantomData; - -use super::token::TokenSet; - -pub trait Syntax<'a> -where - Self: Debug + Clone + Copy, -{ - type TokenSet: TokenSet<'a>; - - fn into_iter() -> impl Iterator; - fn into_rules(&self) -> Vec>; - - fn into_ruleset() -> RuleSet<'a, Self::TokenSet> { - let rules = Self::into_iter() - .enumerate() - .flat_map(|(idx, elem)| { - let mut rules = Self::into_rules(&elem); - for rule in &mut rules { - rule.id = idx; - } - rules - }) - .collect::>(); - - RuleSet::from(rules) - } -} - -#[derive(PartialEq, Eq, Hash, Debug)] -pub struct Rule<'a, T: TokenSet<'a>> { - pub id: usize, - pub lhs: RuleElem<'a, T>, - pub rhs: Vec>, - tokenset: PhantomData<&'a T>, -} - -impl<'a, T: TokenSet<'a>> From<(RuleElem<'a, T>, Vec>)> for Rule<'a, T> { - fn from((lhs, rhs): (RuleElem<'a, T>, Vec>)) -> Self { - Rule { - id: 0, - lhs, - rhs, - tokenset: PhantomData, - } - } -} - -impl<'a, T: TokenSet<'a>> Rule<'a, T> { - pub fn nonterms<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { - let mut l_nonterms = vec![&self.lhs]; - let r_nonterms: Vec<&RuleElem> = self - .rhs - .iter() - .filter(|token| matches!(token, RuleElem::::NonTerm(_))) - .collect(); - l_nonterms.extend(r_nonterms); - l_nonterms - } - - pub fn terms<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { - self.rhs - .iter() - .filter(|token| matches!(token, RuleElem::::Term(_))) - .collect() - } -} - -#[derive(Debug)] -pub enum RuleElem<'a, T: TokenSet<'a>> { - NonTerm(String), - Term((T, PhantomData<&'a T>)), - EOF, -} - -impl<'a, T: TokenSet<'a>> Hash for RuleElem<'a, T> { - fn hash(&self, state: &mut H) { - match self { - RuleElem::NonTerm(s) => s.hash(state), - RuleElem::Term(t) => t.hash(state), - RuleElem::EOF => 0.hash(state), - } - } -} - -impl<'a, T: TokenSet<'a>> PartialEq for RuleElem<'a, T> { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (RuleElem::NonTerm(s1), RuleElem::NonTerm(s2)) => s1 == s2, - (RuleElem::Term(t1), RuleElem::Term(t2)) => t1 == t2, - (RuleElem::EOF, RuleElem::EOF) => true, - _ => false, - } - } -} - -impl<'a, T: TokenSet<'a>> Eq for RuleElem<'a, T> {} - -impl<'a, T: TokenSet<'a>> RuleElem<'a, T> { - pub fn new_nonterm>(t: U) -> RuleElem<'a, T> { - RuleElem::NonTerm(t.into()) - } - - pub fn new_term(t: T) -> RuleElem<'a, T> { - RuleElem::Term((t, PhantomData)) - } -} - -#[derive(Debug)] -pub struct RuleSet<'a, T: TokenSet<'a>> { - pub top: String, - pub rules: Vec>, - tokenset: PhantomData<&'a T>, -} - -impl<'a, T: TokenSet<'a>> From>> for RuleSet<'a, T> { - fn from(rules: Vec>) -> Self { - let top = match &rules[0].lhs { - RuleElem::NonTerm(s) => s.clone(), - _ => unreachable!(), - }; - - RuleSet { - top, - rules, - tokenset: PhantomData, - } - } -} - -impl<'a, T: TokenSet<'a>> RuleSet<'a, T> { - pub fn nonterms<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { - self.rules.iter().flat_map(|rule| rule.nonterms()).collect() - } - - pub fn terms<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { - self.rules.iter().flat_map(|rule| rule.terms()).collect() - } - - pub fn find_rule<'b>(&'b self, target: &RuleElem<'a, T>) -> Vec<&'b Rule<'a, T>> { - self.rules - .iter() - .filter(|rule| &rule.lhs == target) - .collect() - } - - pub fn first_set<'b>(&'b self) -> HashMap<&'b RuleElem<'a, T>, Vec<&'b RuleElem<'a, T>>> { - // 1. Calc a null set - let nulls_set = self.nulls_set(); - - // 2. Initialize a first set - let mut first_set: HashMap<&RuleElem, Vec<&RuleElem>> = HashMap::new(); - first_set.insert(&RuleElem::EOF, vec![&RuleElem::EOF]); - self.terms().into_iter().for_each(|relem| { - first_set.insert(relem, vec![relem]); - }); - self.nonterms().into_iter().for_each(|relem| { - first_set.insert(relem, vec![]); - }); - - // 3. List up candidates from a nonterm set - let mut candidates = vec![]; - for nonterm in self.nonterms() { - let rules = self.find_rule(nonterm); - for rule in rules { - for relem in &rule.rhs { - if &rule.lhs != relem { - candidates.push((nonterm, relem)) - } - if !nulls_set.contains(&relem) { - break; - } - } - } - } - - // 4. Find first set with recursive - let mut updated = true; - while updated { - updated = false; - for (nonterm, candidate) in &candidates { - let found_elems: Vec<&RuleElem> = first_set - .get(candidate) - .unwrap() - .iter() - .filter(|relem| !first_set.get(nonterm).unwrap().contains(relem)) - .copied() - .collect(); - updated = !found_elems.is_empty(); - first_set - .get_mut(nonterm) - .unwrap() - .extend(found_elems.into_iter()); - } - } - - first_set - } - - fn nulls_set<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { - // 1. Find null rules - let mut nulls_set: Vec<&RuleElem> = self - .rules - .iter() - .filter(|rule| rule.rhs.is_empty()) - .map(|rule| &rule.lhs) - .collect(); - - // 2. Find null rules with recursive - let mut updated = true; - while updated { - updated = false; - for rule in &self.rules { - if nulls_set.contains(&&rule.lhs) { - continue; - } else if rule.rhs.iter().all(|relem| nulls_set.contains(&relem)) { - nulls_set.push(&rule.lhs); - updated = true; - } else { - continue; - } - } - } - - nulls_set - } -} - -#[cfg(test)] -mod test { - use std::collections::HashMap; - - use super::{TokenSet, Syntax, Rule, RuleElem}; - - #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] - enum TestToken { - Num, - Plus, - Minus, - Mul, - Div, - BracketA, - BracketB, - } - - impl TokenSet<'_> for TestToken { - fn into_iter() -> impl Iterator { - Box::new( - vec![ - TestToken::Num, - TestToken::Plus, - TestToken::Minus, - TestToken::Mul, - TestToken::Div, - TestToken::BracketA, - TestToken::BracketB, - ] - .into_iter(), - ) - } - - fn into_regex_str(&self) -> &'static str { - match self { - TestToken::Num => r"^[1-9][0-9]*", - TestToken::Plus => r"^\+", - TestToken::Minus => r"^-", - TestToken::Mul => r"^\*", - TestToken::Div => r"^/", - TestToken::BracketA => r"^\(", - TestToken::BracketB => r"^\)", - } - } - - fn ignore_str() -> &'static str { - r"^[ \t\n]+" - } - } - - #[derive(Debug, Clone, Copy)] - enum TestSyntax { - ExprPlus, - ExprMinus, - Expr2Term, - TermMul, - TermDiv, - Term2Fact, - Fact2Expr, - Fact2Num, - } - - impl<'a> Syntax<'a> for TestSyntax { - type TokenSet = TestToken; - - fn into_iter() -> impl Iterator { - Box::new( - vec![ - TestSyntax::ExprPlus, - TestSyntax::ExprMinus, - TestSyntax::Expr2Term, - TestSyntax::TermMul, - TestSyntax::TermDiv, - TestSyntax::Term2Fact, - TestSyntax::Fact2Expr, - TestSyntax::Fact2Num, - ] - .into_iter(), - ) - } - - fn into_rules(&self) -> Vec> { - let expr_plus = Rule::from(( - RuleElem::new_nonterm("expr"), - vec![ - RuleElem::new_nonterm("expr"), - RuleElem::new_term(TestToken::Plus), - RuleElem::new_nonterm("term"), - ], - )); - - let expr_minus = Rule::from(( - RuleElem::new_nonterm("expr"), - vec![ - RuleElem::new_nonterm("expr"), - RuleElem::new_term(TestToken::Minus), - RuleElem::new_nonterm("term"), - ], - )); - - let expr_2_term = Rule::::from(( - RuleElem::new_nonterm("expr"), - vec![RuleElem::new_nonterm("term")], - )); - - let term_mul = Rule::from(( - RuleElem::new_nonterm("term"), - vec![ - RuleElem::new_nonterm("term"), - RuleElem::new_term(TestToken::Mul), - RuleElem::new_nonterm("fact"), - ], - )); - - let term_div = Rule::from(( - RuleElem::new_nonterm("term"), - vec![ - RuleElem::new_nonterm("term"), - RuleElem::new_term(TestToken::Div), - RuleElem::new_nonterm("fact"), - ], - )); - - let term_2_fact = Rule::::from(( - RuleElem::new_nonterm("term"), - vec![RuleElem::new_nonterm("fact")], - )); - - let fact_2_expr = Rule::from(( - RuleElem::new_nonterm("fact"), - vec![ - RuleElem::new_term(TestToken::BracketA), - RuleElem::new_nonterm("expr"), - RuleElem::new_term(TestToken::BracketB), - ], - )); - - let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![])); - - match self { - TestSyntax::ExprPlus => vec![expr_plus], - TestSyntax::ExprMinus => vec![expr_minus], - TestSyntax::Expr2Term => vec![expr_2_term], - TestSyntax::TermMul => vec![term_mul], - TestSyntax::TermDiv => vec![term_div], - TestSyntax::Term2Fact => vec![term_2_fact], - TestSyntax::Fact2Expr => vec![fact_2_expr], - TestSyntax::Fact2Num => vec![fact_2_num], - } - } - } - - fn check>( - first_set: &HashMap<&RuleElem, Vec<&RuleElem>>, - nonterm: T, - exp_terms: Vec, - ) { - let nonterms = RuleElem::::new_nonterm(nonterm); - let exp_terms: Vec> = exp_terms - .into_iter() - .map(|term| RuleElem::new_term(term)) - .collect(); - assert!(first_set.get(&nonterms).unwrap().len() == exp_terms.len()); - - let result = first_set - .get(&nonterms) - .unwrap() - .into_iter() - .zip(exp_terms.into_iter()) - .any(|(a, b)| a == &&b); - assert!(result); - } - - #[test] - fn first_set() { - let ruleset = ::into_ruleset(); - let first_set = ruleset.first_set(); - - check( - &first_set, - "expr", - vec![ - TestToken::Plus, - TestToken::Minus, - TestToken::Mul, - TestToken::Div, - TestToken::BracketA, - ], - ); - check( - &first_set, - "term", - vec![TestToken::Mul, TestToken::Div, TestToken::BracketA], - ); - check(&first_set, "fact", vec![TestToken::BracketA]); - } -} diff --git a/crates/core/src/cfg/token.rs b/crates/core/src/cfg/token.rs deleted file mode 100644 index e3a6d3f..0000000 --- a/crates/core/src/cfg/token.rs +++ /dev/null @@ -1,25 +0,0 @@ -use std::fmt::Debug; -use std::hash::Hash; - -use regex::{Regex, RegexSet}; - -pub trait TokenSet<'a> -where - Self: Debug + Copy + Clone + Hash + Eq, -{ - fn ignore_str() -> &'a str; - fn into_iter() -> impl Iterator; - fn into_regex_str(&self) -> &'a str; - - fn into_regex(&self) -> anyhow::Result { - Ok(Regex::new(self.into_regex_str())?) - } - - fn try_into_regexset() -> anyhow::Result { - let regex_set = Self::into_iter() - .map(|token| Self::into_regex_str(&token)) - .collect::>(); - - Ok(RegexSet::new(regex_set)?) - } -} diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index 7f559fc..0c63504 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -4,8 +4,7 @@ use std::fmt::Display; use thiserror::Error; -use crate::cfg::TokenSet; -use crate::lex::Token; +use copager_cfg::token::{TokenTag, Token}; #[derive(Debug, Error)] pub struct ParseError { @@ -16,7 +15,36 @@ pub struct ParseError { impl Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.err) + fn pretty_print( + f: &mut std::fmt::Formatter<'_>, + input: &str, + pos: (usize, usize) + ) -> std::fmt::Result { + writeln!(f, "-----")?; + + let (row, col) = (pos.0 as i32 - 1, pos.1 as i32 - 1); + let lines = input.split('\n'); + let neighbor_lines = lines + .skip(max(0, row - 2) as usize) + .take(min(row + 1, 3) as usize); + + for (idx, line) in neighbor_lines.enumerate() { + let row = max(1, row - 1) + (idx as i32); + writeln!(f, "{:2}: {}", row, line)?; + } + + writeln!(f, " {}^ here", " ".repeat(col as usize))?; + writeln!(f, "Found at line {}, column {}.", row + 1, col + 1)?; + writeln!(f, "-----") + } + + writeln!(f, "{}", self.err)?; + match (&self.src, self.pos) { + (Some(src), Some(pos)) => pretty_print(f, &src, pos)?, + _ => {}, + } + + Ok(()) } } @@ -32,7 +60,7 @@ impl ParseError { } } - pub fn with<'a, T: TokenSet<'a>>(self, token: Token<'a, '_, T>) -> ParseError { + pub fn with<'input, T: TokenTag>(self, token: Token<'input, T>) -> ParseError { let mut sum = 0; let (mut rows, mut cols) = (1, 1); for c in token.src.chars() { @@ -58,32 +86,4 @@ impl ParseError { pos: Some((rows, cols)), } } - - pub fn pretty_print(&self) { - let pretty_printer = |input: &str, pos: (usize, usize)| { - eprintln!("-----"); - - let (row, col) = (pos.0 as i32 - 1, pos.1 as i32 - 1); - let lines = input.split('\n'); - let neighbor_lines = lines - .skip(max(0, row - 2) as usize) - .take(min(row + 1, 3) as usize); - - neighbor_lines.enumerate().for_each(|(idx, line)| { - let row = max(1, row - 1) + (idx as i32); - println!("{:2}: {}", row, line); - }); - - eprintln!(" {}^ here", " ".repeat(col as usize)); - eprintln!("Error at line {}, column {}.", row + 1, col + 1); - eprintln!("-----\n"); - }; - - match (&self.src, self.pos) { - (Some(src), Some(pos)) => { - pretty_printer(&src, pos); - } - _ => {}, - } - } } diff --git a/crates/core/src/lex.rs b/crates/core/src/lex.rs deleted file mode 100644 index 240da9f..0000000 --- a/crates/core/src/lex.rs +++ /dev/null @@ -1,184 +0,0 @@ -use std::marker::PhantomData; - -use regex::{Regex, RegexSet}; - -use crate::cfg::TokenSet; - -#[derive(Debug, Copy, Clone)] -pub struct Token<'a, 'b, T: TokenSet<'a>> { - pub kind: T, - pub src: &'b str, - pub range: (usize, usize), - tokenset: PhantomData<&'a T>, -} - -impl<'a, 'b, T: TokenSet<'a>> Token<'a, 'b, T> { - pub fn new(kind: T, src: &'b str, range: (usize, usize)) -> Self { - Token { - kind, - src, - range, - tokenset: PhantomData, - } - } - - pub fn as_str(&self) -> &'b str { - let (l, r) = self.range; - &self.src[l..r] - } -} - -pub(crate) struct Lexer; - -impl Lexer { - pub fn new<'a, 'b, T>(input: &'b str) -> anyhow::Result>> - where - T: TokenSet<'a> + 'a, - { - LexDriver::<'a, 'b, T>::try_from(input) - } -} - -struct LexDriver<'a, 'b, T: TokenSet<'a>> { - // Regex - regex_istr: Regex, - regex_set: RegexSet, - regex_map: Vec<(Regex, T)>, - - // State - input: &'b str, - pos: usize, - - // PhantomData - tokenset: PhantomData<&'a T>, -} - -impl<'a, 'b, T: TokenSet<'a>> TryFrom<&'b str> for LexDriver<'a, 'b, T> { - type Error = anyhow::Error; - - fn try_from(input: &'b str) -> anyhow::Result { - let regex_istr = Regex::new(T::ignore_str())?; - let regex_set = T::try_into_regexset()?; - let regex_map = T::into_iter() - .map(|token| Ok((token.into_regex()?, token))) - .collect::>>()?; - - Ok(LexDriver { - regex_istr, - regex_set, - regex_map, - input, - pos: 0, - tokenset: PhantomData, - }) - } -} - -impl<'a, 'b, T: TokenSet<'a> + 'a> Iterator for LexDriver<'a, 'b, T> { - type Item = Token<'a, 'b, T>; - - fn next(&mut self) -> Option { - // Skip Spaces - let remain = match self.regex_istr.find(&self.input[self.pos..]) { - Some(acc_s) => { - self.pos += acc_s.len(); - &self.input[self.pos..] - } - None => &self.input[self.pos..] - }; - - // Find the token - let mut matches = self - .regex_set - .matches(remain) - .into_iter() - .map(|idx| &self.regex_map[idx]) - .map(|(regex, token)| (*token, regex.find(remain).unwrap().as_str())) - .collect::>(); - matches.sort_by(|(_, a), (_, b)| a.len().cmp(&b.len())); - - // Update myself - let (token, acc_s) = matches.first()?; - let range = (self.pos, self.pos + acc_s.len()); - self.pos += acc_s.len(); - - Some(Token::new(*token, &self.input, range)) - } -} - -#[cfg(test)] -mod test { - use serde::{Deserialize, Serialize}; - - use crate::cfg::TokenSet; - use super::Lexer; - - #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)] - enum TestToken { - Num, - Plus, - } - - impl TokenSet<'_> for TestToken { - fn ignore_str() -> &'static str { - r"^[ \t\n]+" - } - - fn into_iter() -> impl Iterator { - vec![TestToken::Num, TestToken::Plus].into_iter() - } - - fn into_regex_str(&self) -> &'static str { - match self { - TestToken::Num => r"^[1-9][0-9]*", - TestToken::Plus => r"^\+", - } - } - } - - fn check<'a, 'b>( - expected: &Vec<(TestToken, &'b str, (usize, usize))>, - input: &'b str, - ) -> bool { - Lexer::new::(input) - .unwrap() - .into_iter() - .zip(expected.iter()) - .all(|(a, b)| { - a.kind == b.0 && a.range == b.2 && a.as_str() == b.1 - }) - } - - #[test] - fn input_ok_1() { - let expected = vec![ - (TestToken::Num, "10", (0, 2)), - (TestToken::Plus, "+", (2, 3)), - (TestToken::Num, "20", (3, 5)), - ]; - let input = "10+20"; - assert!(check(&expected, input)); - } - - #[test] - fn input_ok_2() { - let expected = vec![ - (TestToken::Num, "10", (12, 14)), - (TestToken::Plus, "+", (15, 16)), - (TestToken::Num, "20", (23, 25)), - ]; - let input = " 10 +\n 20 "; - assert!(check(&expected, input)); - } - - #[test] - fn input_ok_3() { - let expected = vec![ - (TestToken::Num, "10", (12, 14)), - (TestToken::Plus, "+", (15, 16)), - (TestToken::Num, "20", (23, 25)), - ]; - let input = " 10 +\n 20ffff30 - 40 * 50"; - assert!(check(&expected, input)); - } -} diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 78bc5b5..a7571f7 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -1,41 +1,189 @@ -pub mod cfg; pub mod error; -pub mod parse; -pub mod lex; use std::marker::PhantomData; use serde::{Serialize, Deserialize}; +use serde_cbor::ser::to_vec_packed; +use serde_cbor::de::from_slice; -use lex::Lexer; -use parse::{ParserImpl, SExp}; +use copager_lex::{LexSource, LexDriver}; +use copager_parse::{ParseSource, ParseDriver, ParseEvent}; +use copager_ir::{IR, IRBuilder}; +use copager_utils::cache::Cacheable; + +pub trait GrammarDesign { + type Lex: LexSource; + type Parse: ParseSource<::Tag>; +} + +pub struct Grammar +where + Sl: LexSource, + Sp: ParseSource, +{ + _phantom_sl: PhantomData, + _phantom_sp: PhantomData, +} + +impl GrammarDesign for Grammar +where + Sl: LexSource, + Sp: ParseSource, +{ + type Lex = Sl; + type Parse = Sp; +} #[derive(Debug, Serialize, Deserialize)] -pub struct Parser<'a, Algorithm> +pub struct Processor +where + G: GrammarDesign, + Dl: LexDriver, + Dp: ParseDriver, +{ + // Cache + cache_lex: Option>, + cache_parse: Option>, + + // Driver + #[serde(skip, default="Option::default")] + lexer: Option
, + #[serde(skip, default="Option::default")] + parser: Option, + + // Phantom + #[serde(skip)] + _phantom_g: PhantomData, + #[serde(skip)] + _phantom_dl: PhantomData
, + #[serde(skip)] + _phantom_dp: PhantomData, +} + +impl Processor +where + G: GrammarDesign, + Dl: LexDriver, + Dp: ParseDriver, +{ + pub fn new() -> Self { + Processor { + cache_lex: None, + cache_parse: None, + lexer: None, + parser: None, + _phantom_g: PhantomData, + _phantom_dl: PhantomData, + _phantom_dp: PhantomData, + } + } + + pub fn build_lexer(self) -> anyhow::Result + where + G::Lex: Default, + { + self.build_lexer_by(G::Lex::default()) + } + + pub fn build_lexer_by(mut self, source: G::Lex) -> anyhow::Result { + let lexer = Dl::try_from(source)?; + self.lexer = Some(lexer); + + Ok(self) + } + + pub fn build_parser(self) -> anyhow::Result + where + G::Lex: Default, + G::Parse: Default, + { + self.build_parser_by((G::Lex::default(), G::Parse::default())) + } + + pub fn build_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result { + let parser = Dp::try_from(source)?; + self.parser = Some(parser); + + Ok(self) + } + + pub fn process<'input, I>(&self, input: &'input str) -> anyhow::Result + where + I: IR<'input, G::Lex, G::Parse>, + { + let lexer = self.lexer.as_ref().unwrap(); + let parser = self.parser.as_ref().unwrap(); + + let mut ir_builder = I::Builder::new(); + for result in parser.run(lexer.run(input)) { + match result { + ParseEvent::Read(token) => ir_builder.on_read(token)?, + ParseEvent::Parse{ rule,len } => ir_builder.on_parse(rule, len)?, + ParseEvent::Err(err) => return Err(err), + } + } + + ir_builder.build() + } +} + +impl Processor where - Algorithm: ParserImpl<'a>, + G: GrammarDesign, + Dl: LexDriver + Cacheable, + Dp: ParseDriver, { - r#impl: Algorithm, - phantom: PhantomData<&'a ()>, + pub fn prebuild_lexer(self) -> anyhow::Result + where + G::Lex: Default, + { + self.prebuild_lexer_by(G::Lex::default()) + } + + pub fn prebuild_lexer_by(mut self, source: G::Lex) -> anyhow::Result { + let cache_lex = Dl::new(source)?; + self.cache_lex = Some(to_vec_packed(&cache_lex)?); + + Ok(self) + } + + pub fn build_lexer_by_cache(mut self) -> Self { + let cache_lex = self.cache_lex.as_ref().unwrap(); + let cache_lex = from_slice(cache_lex); + let lexer = Dl::restore(cache_lex.unwrap()); + self.lexer = Some(lexer); + + self + } } -#[allow(clippy::new_without_default)] -impl<'a, Algorithm> Parser<'a, Algorithm> +impl Processor where - Algorithm: ParserImpl<'a>, + G: GrammarDesign, + Dl: LexDriver, + Dp: ParseDriver + Cacheable<(G::Lex, G::Parse)>, { - pub fn new() -> anyhow::Result> { - Ok(Parser { - r#impl: Algorithm::setup()?, - phantom: PhantomData, - }) - } - - pub fn parse<'b>( - &self, - input: &'b str, - ) -> anyhow::Result> { - let lexer = Lexer::new::(input)?; - self.r#impl.parse(lexer) + pub fn prebuild_parser(self) -> anyhow::Result + where + G::Lex: Default, + G::Parse: Default, + { + self.prebuild_parser_by((G::Lex::default(), G::Parse::default())) + } + + pub fn prebuild_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result { + let cache_parse = Dp::new(source)?; + self.cache_parse = Some(to_vec_packed(&cache_parse)?); + + Ok(self) + } + + pub fn build_parser_by_cache(mut self) -> Self { + let cache_parse = self.cache_parse.as_ref().unwrap(); + let cache_parse = from_slice(cache_parse); + let parser = Dp::restore(cache_parse.unwrap()); + self.parser = Some(parser); + + self } } diff --git a/crates/core/src/parse.rs b/crates/core/src/parse.rs deleted file mode 100644 index 8a2d4df..0000000 --- a/crates/core/src/parse.rs +++ /dev/null @@ -1,86 +0,0 @@ -use std::fmt::{Display, Debug}; - -use crate::cfg::{TokenSet, Syntax}; -use crate::lex::Token; - -pub trait ParserImpl<'a> -where - Self: Sized, -{ - type TokenSet: TokenSet<'a> + 'a; - type Syntax: Syntax<'a, TokenSet = Self::TokenSet>; - - fn setup() -> anyhow::Result; - fn parse<'b>( - &self, - lexer: impl Iterator>, - ) -> anyhow::Result>; -} - -#[derive(Debug)] -pub enum SExp<'a, 'b, T, S> -where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>, -{ - List { - tag: S, - elems: Vec>, - }, - Atom(Token<'a, 'b, T>), -} - -impl<'a, T, S> Display for SExp<'a, '_, T, S> -where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T> + Debug, -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - SExp::List { tag, elems } => { - write!(f, "({:?}", tag)?; - for elem in elems { - write!(f, " {}", elem)?; - } - write!(f, ")") - } - SExp::Atom(token) => write!(f, "{:?}", token.as_str()), - } - } -} - -#[derive(Debug)] -pub struct SExpBuilder<'a, 'b, T, S> -where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>, -{ - stack: Vec>, -} - -impl<'a, 'b, T, S> SExpBuilder<'a, 'b, T, S> -where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>, -{ - pub fn new() -> SExpBuilder<'a, 'b, T, S> { - SExpBuilder { stack: vec![] } - } - - pub fn push(&mut self, token: Token<'a, 'b, T>) { - self.stack.push(SExp::Atom(token)); - } - - pub fn wrap(&mut self, tag: S, cnt: usize) { - let elems = self.stack.split_off(self.stack.len() - cnt); - self.stack.push(SExp::List { tag, elems }); - } - - pub fn build(mut self) -> anyhow::Result> { - if self.stack.len() == 1 { - Ok(self.stack.pop().unwrap()) - } else { - Err(anyhow::anyhow!("Invalid S-Expression")) - } - } -} diff --git a/crates/core/tests/prebuild.rs b/crates/core/tests/prebuild.rs new file mode 100644 index 0000000..af7db01 --- /dev/null +++ b/crates/core/tests/prebuild.rs @@ -0,0 +1,86 @@ +use serde::{Serialize, Deserialize}; +use serde_cbor::ser::to_vec_packed; +use serde_cbor::de::from_slice; + +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr1::LR1; +use copager_ir_void::Void; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize +)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize +)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyGrammar = Grammar; +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +#[test] +fn prebuild() -> anyhow::Result<()> { + // in build.rs + let prebuiled_processor = build_rs()?; + let serialized = to_vec_packed(&prebuiled_processor)?; + + // in main.rs + let deserialized: MyProcessor = from_slice(&serialized)?; + main_rs(deserialized)?; + + Ok(()) +} + +fn build_rs() -> anyhow::Result { + MyProcessor::new().prebuild_parser() +} + +fn main_rs(processor: MyProcessor) -> anyhow::Result<()> { + processor + .build_lexer()? + .build_parser_by_cache() + .process::("1 + 2 * 3")?; + + Ok(()) +} diff --git a/crates/core/tests/simple.rs b/crates/core/tests/simple.rs new file mode 100644 index 0000000..4a62fd6 --- /dev/null +++ b/crates/core/tests/simple.rs @@ -0,0 +1,68 @@ +use serde::{Serialize, Deserialize}; + +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr1::LR1; +use copager_ir_void::Void; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize +)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize +)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyGrammar = Grammar; +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +#[test] +fn simple_success() -> anyhow::Result<()> { + MyProcessor::new() + .build_lexer()? + .build_parser()? + .process::("1 + 2 * 3")?; + + Ok(()) +} diff --git a/crates/core/tests/simple_multiple.rs b/crates/core/tests/simple_multiple.rs new file mode 100644 index 0000000..5e8ebc0 --- /dev/null +++ b/crates/core/tests/simple_multiple.rs @@ -0,0 +1,119 @@ +use serde::{Serialize, Deserialize}; + +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr1::LR1; +use copager_ir_void::Void; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize +)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize +)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyGrammar = Grammar; +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +const OK_INPUTS: [&str; 7] = [ + "1 + 2", + "1 + 2 * 3", + "1 + 2 * 3 / 4", + "1 + 2 * (3 / 4)", + "1 + 2 * (3 / 4) - 5", + "1 + 2 * (3 / 4) - 5 * 6", + "(1 + 2) * ((3 / 4) - 5 * 6 / 7)", +]; + +const ERR_INPUTS: [&str; 7] = [ + "1 +", + "1 + 2 *", + "1 + 2 * 3 /", + "1 + 2 * (3 /", + "1 + 2 * (3 / 4", + "1 + 2 * (3 / 4) -", + "(1 + 2) * ((3 / 4) - 5 * 6 /", +]; + +#[test] +fn simple_multiple_only_success() { + let processor = gen_processor(); + for input in OK_INPUTS { + assert!(processor.process::(input).is_ok()); + } +} + +#[test] +fn simple_multiple_only_failure() { + let processor = gen_processor(); + for input in ERR_INPUTS { + assert!(processor.process::(input).is_err()); + } +} + +#[test] +fn simple_multiple_mix_success_and_failure() { + let mixed_testcases = OK_INPUTS + .iter() + .zip(ERR_INPUTS.iter()) + .flat_map(|(ok, err)| vec![(true, ok), (false, err)]); + + let processor = gen_processor(); + for (is_ok, input) in mixed_testcases { + if is_ok { + assert!(processor.process::(input).is_ok()); + } else { + assert!(processor.process::(input).is_err()); + } + } +} + +fn gen_processor() -> MyProcessor { + MyProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap() +} diff --git a/crates/core_derive/src/impl.rs b/crates/core_derive/src/impl.rs deleted file mode 100644 index dd500e0..0000000 --- a/crates/core_derive/src/impl.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod tokenset; -pub mod syntax; diff --git a/crates/core_derive/src/impl/syntax.rs b/crates/core_derive/src/impl/syntax.rs deleted file mode 100644 index 83324ce..0000000 --- a/crates/core_derive/src/impl/syntax.rs +++ /dev/null @@ -1,112 +0,0 @@ -use proc_macro2::TokenStream; -use quote::quote; -use syn::{Data, DeriveInput, Variant, Ident, LitStr}; - -pub fn syntax_proc_macro_impl(ast: DeriveInput) -> TokenStream { - let data_enum = if let Data::Enum(data_enum) = ast.data { - data_enum - } else { - panic!("\"Syntax\" proc-macro is only implemented for enum.") - }; - - let parsed_variantes = data_enum - .variants - .iter() - .map(|variant| VariantInfo::parse(&ast.ident, variant)) - .collect::>(); - - let enum_name = &ast.ident; - let enum_assoc_type = format!("{}", enum_name) - .replace("Syntax", "TokenSet") - .parse::() - .unwrap(); - let enum_variants = parsed_variantes - .iter() - .map(|variant| variant.gen_ident()); - let enum_rule_table = parsed_variantes - .iter() - .map(|variant| variant.gen_ident_with_rule()); - - quote! { - impl<'a> Syntax<'a> for #enum_name { - type TokenSet = #enum_assoc_type; - - fn into_iter() -> impl Iterator { - vec![ - #( #enum_variants, )* - ].into_iter() - } - - fn into_rules(&self) -> Vec> { - match self { - #( #enum_rule_table, )* - _ => unimplemented!(), - } - } - } - } -} - -struct VariantInfo<'a> { - parent_ident: &'a Ident, - self_ident: &'a Ident, - rules: Vec, -} - -impl<'a> VariantInfo<'a> { - fn parse(parent_ident: &'a Ident, variant: &'a Variant) -> VariantInfo<'a> { - let self_ident = &variant.ident; - - let mut rules = vec![]; - for attr in &variant.attrs { - let attr = attr.parse_args::().unwrap().value(); - rules.push(Self::parse_rule(&attr)); - } - - VariantInfo { - parent_ident, - self_ident, - rules, - } - } - - fn parse_rule(s: &str) -> TokenStream { - let mut splitted = s.split("::="); - - let lhs = splitted.next().unwrap().trim(); - let lhs = &lhs[1..lhs.len() - 1]; - let lhs = quote! { RuleElem::new_nonterm(#lhs) }; - - let rhs = splitted.collect::() - .split_whitespace() - .map(|s| { - if s.starts_with('<') { - let s = &s[1..s.len() - 1]; - quote! { RuleElem::new_nonterm(#s) } - } else { - let ident = s.parse::().unwrap(); - quote! { RuleElem::new_term(Self::TokenSet::#ident) } - } - }) - .collect::>(); - - quote! { Rule::from((#lhs, vec![ #( #rhs, )* ])) } - } - - fn gen_ident(&self) -> TokenStream { - let parent_ident = self.parent_ident; - let self_ident = self.self_ident; - - quote! { #parent_ident :: #self_ident } - } - - fn gen_ident_with_rule(&self) -> TokenStream { - let ident = self.gen_ident(); - if self.rules.is_empty() { - quote! { #ident => unimplemented!() } - } else { - let rules = &self.rules; - quote! { #ident => vec![#(#rules),*] } - } - } -} diff --git a/crates/core_derive/src/lib.rs b/crates/core_derive/src/lib.rs deleted file mode 100644 index 9bdfd72..0000000 --- a/crates/core_derive/src/lib.rs +++ /dev/null @@ -1,15 +0,0 @@ -mod r#impl; - -use syn::{parse_macro_input, DeriveInput}; - -#[proc_macro_derive(TokenSet, attributes(token))] -pub fn derive_tokenset(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let ast = parse_macro_input!(input as DeriveInput); - r#impl::tokenset::proc_macro_impl(ast).into() -} - -#[proc_macro_derive(Syntax, attributes(rule))] -pub fn derive_syntax(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let ast = parse_macro_input!(input as DeriveInput); - r#impl::syntax::syntax_proc_macro_impl(ast).into() -} diff --git a/crates/core_derive/Cargo.toml b/crates/core_macros/Cargo.toml similarity index 82% rename from crates/core_derive/Cargo.toml rename to crates/core_macros/Cargo.toml index 8516b8c..d33518c 100644 --- a/crates/core_derive/Cargo.toml +++ b/crates/core_macros/Cargo.toml @@ -1,6 +1,6 @@ [package] -name = "core_derive" -version = "0.1.1" +name = "copager_core_macros" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/core_macros/src/impl.rs b/crates/core_macros/src/impl.rs new file mode 100644 index 0000000..93d0603 --- /dev/null +++ b/crates/core_macros/src/impl.rs @@ -0,0 +1,2 @@ +pub(crate) mod prebuild; +pub(crate) mod load; diff --git a/crates/core_macros/src/impl/load.rs b/crates/core_macros/src/impl/load.rs new file mode 100644 index 0000000..87779a6 --- /dev/null +++ b/crates/core_macros/src/impl/load.rs @@ -0,0 +1,23 @@ +use proc_macro2::TokenStream; +use quote::quote; +use syn::ItemFn; + +pub fn proc_macro_impl_load(_args: TokenStream, ast: ItemFn) -> TokenStream { + let fn_visibility = ast.vis; + let fn_ident = ast.sig.ident; + let fn_args = ast.sig.inputs; + let fn_ret_type = ast.sig.output; + let fn_body = ast.block; + + quote! { + fn #fn_ident () #fn_ret_type { + #fn_visibility fn __inner (#fn_args) #fn_ret_type { + #fn_body + } + + let cache_body = include_str!(concat!(env!("OUT_DIR"), "/MyProcessor.cache")); + let deserialized = copager::prebuild::deserialize(&cache_body).unwrap(); + __inner(deserialized) + } + } +} diff --git a/crates/core_macros/src/impl/prebuild.rs b/crates/core_macros/src/impl/prebuild.rs new file mode 100644 index 0000000..227bc6c --- /dev/null +++ b/crates/core_macros/src/impl/prebuild.rs @@ -0,0 +1,24 @@ +use proc_macro2::TokenStream; +use quote::quote; +use syn::ItemFn; + +pub fn proc_macro_impl_prebuild(_args: TokenStream, ast: ItemFn) -> TokenStream { + let fn_visibility = ast.vis; + let fn_ident = ast.sig.ident; + let fn_args = ast.sig.inputs; + let fn_ret_type = ast.sig.output; + let fn_body = ast.block; + + quote! { + fn #fn_ident () { + #fn_visibility fn __inner (#fn_args) #fn_ret_type { + #fn_body + } + + let serialized = copager::prebuild::serialize(&__inner()).unwrap(); + let out_dir = std::env::var_os("OUT_DIR").unwrap(); + let cache_path = std::path::Path::new(&out_dir).join("MyProcessor.cache"); + std::fs::write(cache_path, serialized).unwrap(); + } + } +} diff --git a/crates/core_macros/src/lib.rs b/crates/core_macros/src/lib.rs new file mode 100644 index 0000000..b9cf84d --- /dev/null +++ b/crates/core_macros/src/lib.rs @@ -0,0 +1,24 @@ +mod r#impl; + +use proc_macro2::TokenStream; +use syn::{parse_macro_input, ItemFn}; + +#[proc_macro_attribute] +pub fn prebuild( + attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + let args: TokenStream = attr.into(); + let ast = parse_macro_input!(item as ItemFn); + r#impl::prebuild::proc_macro_impl_prebuild(args, ast).into() +} + +#[proc_macro_attribute] +pub fn load( + attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + let args: TokenStream = attr.into(); + let ast = parse_macro_input!(item as ItemFn); + r#impl::load::proc_macro_impl_load(args, ast).into() +} diff --git a/crates/ir/Cargo.toml b/crates/ir/Cargo.toml new file mode 100644 index 0000000..c05bab9 --- /dev/null +++ b/crates/ir/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "copager_ir" +version = "0.2.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs new file mode 100644 index 0000000..a45b3a5 --- /dev/null +++ b/crates/ir/src/lib.rs @@ -0,0 +1,24 @@ +use copager_cfg::token::Token; +use copager_lex::LexSource; +use copager_parse::ParseSource; + +pub trait IR<'input, Sl, Sp> +where + Sl: LexSource, + Sp: ParseSource, +{ + type Builder: IRBuilder<'input, Sl, Sp, Output = Self>; +} + +pub trait IRBuilder<'input, Sl, Sp> +where + Sl: LexSource, + Sp: ParseSource, +{ + type Output: IR<'input, Sl, Sp>; + + fn new() -> Self; + fn on_read(&mut self, token: Token<'input, Sl::Tag>) -> anyhow::Result<()>; + fn on_parse(&mut self, rule: Sp::Tag, len: usize) -> anyhow::Result<()>; + fn build(self) -> anyhow::Result; +} diff --git a/crates/ir_sexp/Cargo.toml b/crates/ir_sexp/Cargo.toml new file mode 100644 index 0000000..b3e15ca --- /dev/null +++ b/crates/ir_sexp/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "copager_ir_sexp" +version = "0.2.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_ir = { path = "../ir" } + +[dev-dependencies] +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_parse_lr1 = { path = "../parse_lr1" } +copager_ir_sexp = { path = "." } diff --git a/crates/ir_sexp/src/lib.rs b/crates/ir_sexp/src/lib.rs new file mode 100644 index 0000000..a24f3c6 --- /dev/null +++ b/crates/ir_sexp/src/lib.rs @@ -0,0 +1,89 @@ +use std::fmt::{Debug, Display}; + +use copager_cfg::token::Token; +use copager_lex::LexSource; +use copager_parse::ParseSource; +use copager_ir::{IR, IRBuilder}; + +#[derive(Debug)] +pub enum SExp<'input, Sl, Sp> +where + Sl: LexSource, + Sp: ParseSource, +{ + List { + rule: Sp::Tag, + elems: Vec>, + }, + Atom(Token<'input, Sl::Tag>), +} + +impl Display for SExp<'_, Sl, Sp> +where + Sl: LexSource, + Sp: ParseSource, + Sp::Tag: Debug, + Sl::Tag: Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SExp::List { rule, elems } => { + write!(f, "({:?}", rule)?; + for elem in elems { + write!(f, " {}", elem)?; + } + write!(f, ")") + } + SExp::Atom(token) => write!(f, "{:?}", token.as_str()), + } + } +} + +impl<'input, Sl, Sp> IR<'input, Sl, Sp> for SExp<'input, Sl, Sp> +where + Sl: LexSource, + Sp: ParseSource, +{ + type Builder = SExpBuilder<'input, Sl, Sp>; +} + +#[derive(Debug)] +pub struct SExpBuilder<'input, Sl, Sp> +where + Sl: LexSource, + Sp: ParseSource, +{ + stack: Vec>, +} + + +impl <'input, Sl, Sp> IRBuilder<'input, Sl, Sp> for SExpBuilder<'input, Sl, Sp> +where + Sl: LexSource, + Sp: ParseSource, +{ + type Output = SExp<'input, Sl, Sp>; + + fn new() -> SExpBuilder<'input, Sl, Sp> { + SExpBuilder { stack: vec![] } + } + + fn on_read(&mut self, token: Token<'input, Sl::Tag>) -> anyhow::Result<()> { + self.stack.push(SExp::Atom(token)); + Ok(()) + } + + fn on_parse(&mut self, rule: Sp::Tag, len: usize) -> anyhow::Result<()> { + let elems = self.stack.split_off(self.stack.len() - len); + self.stack.push(SExp::List { rule, elems }); + Ok(()) + } + + fn build(mut self) -> anyhow::Result { + if self.stack.len() == 1 { + Ok(self.stack.pop().unwrap()) + } else { + Err(anyhow::anyhow!("Invalid S-Expression")) + } + } +} diff --git a/crates/ir_sexp/tests/simple.rs b/crates/ir_sexp/tests/simple.rs new file mode 100644 index 0000000..0f42f78 --- /dev/null +++ b/crates/ir_sexp/tests/simple.rs @@ -0,0 +1,138 @@ +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::{LexSource, LexDriver}; +use copager_lex_regex::RegexLexer; +use copager_parse::{ParseSource, ParseDriver, ParseEvent}; +use copager_parse_lr1::LR1; +use copager_ir::{IR, IRBuilder}; +use copager_ir_sexp::SExp; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyIR = SExp<'static, ExprToken, ExprRule>; + +#[test] +fn simple_display() { + let ir = parse("1"); + assert!(ir.is_ok()); + assert_eq!(ir.unwrap().to_string(), r#"(Expr (Term (Num "1")))"#); + + let ir = parse("1 + 1"); + assert!(ir.is_ok()); + assert_eq!(ir.unwrap().to_string(), r#"(Expr (Expr (Term (Num "1"))) "+" (Term (Num "1")))"#); +} + +#[test] +fn simple_eval() { + assert_eq!(eval(&parse("1").unwrap()), 1); + assert_eq!(eval(&parse("1 + 2").unwrap()), 3); + assert_eq!(eval(&parse("1 + 2 * 3").unwrap()), 7); + assert_eq!(eval(&parse("(1 + 2) * 3").unwrap()), 9); +} + +fn parse<'input>(input: &'input str) -> anyhow::Result> { + let source = ExprToken::default(); + let lexer = >::try_from(source).unwrap(); + + let source = (ExprToken::default(), ExprRule::default()); + let parser = >::try_from(source).unwrap(); + + let mut ir_builder = >::Builder::new(); + for event in parser.run(lexer.run(input)) { + match event { + ParseEvent::Read(token) => { + ir_builder.on_read(token).unwrap(); + } + ParseEvent::Parse { rule, len } => { + ir_builder.on_parse(rule, len).unwrap(); + } + ParseEvent::Err(err) => { + return Err(anyhow::anyhow!("{:?}", err)); + } + } + } + + ir_builder.build() +} + +fn eval(ir: &SExp<'static, ExprToken, ExprRule>) -> i32 { + macro_rules! match_atom { + ($term:expr, $($kind:pat => $block:expr),* $(,)?) => { + match $term { + SExp::Atom(token) => { + match token.kind { + $($kind => $block,)* + _ => unreachable!(), + } + } + _ => unreachable!(), + } + } + } + + match ir { + SExp::List { rule, elems } => { + match rule { + ExprRule::Expr if elems.len() == 1 => eval(&elems[0]), + ExprRule::Expr => { + let lhs = eval(&elems[0]); + let rhs = eval(&elems[2]); + match_atom!(elems[1], + ExprToken::Plus => lhs + rhs, + ExprToken::Minus => lhs - rhs, + ) + } + ExprRule::Term if elems.len() == 1 => eval(&elems[0]), + ExprRule::Term => { + let lhs = eval(&elems[0]); + let rhs = eval(&elems[2]); + match_atom!(elems[1], + ExprToken::Mul => lhs * rhs, + ExprToken::Div => lhs / rhs, + ) + } + ExprRule::Num if elems.len() == 1 => eval(&elems[0]), + ExprRule::Num => eval(&elems[1]), + + } + } + SExp::Atom(token) => token.as_str().parse().unwrap(), + } +} diff --git a/crates/ir_void/Cargo.toml b/crates/ir_void/Cargo.toml new file mode 100644 index 0000000..fcc2038 --- /dev/null +++ b/crates/ir_void/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "copager_ir_void" +version = "0.2.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_ir = { path = "../ir" } diff --git a/crates/ir_void/src/lib.rs b/crates/ir_void/src/lib.rs new file mode 100644 index 0000000..e776042 --- /dev/null +++ b/crates/ir_void/src/lib.rs @@ -0,0 +1,41 @@ +use std::fmt::Debug; + +use copager_cfg::token::Token; +use copager_lex::LexSource; +use copager_parse::ParseSource; +use copager_ir::{IR, IRBuilder}; + +#[derive(Debug)] +pub struct Void; + +impl<'input, Sl, Sp> IR<'input, Sl, Sp> for Void +where + Sl: LexSource, + Sp: ParseSource, +{ + type Builder = Self; +} + +impl <'input, Sl, Sp> IRBuilder<'input, Sl, Sp> for Void +where + Sl: LexSource, + Sp: ParseSource, +{ + type Output = Self; + + fn new() -> Void { + Void + } + + fn on_read(&mut self, _: Token<'input, Sl::Tag>) -> anyhow::Result<()> { + Ok(()) + } + + fn on_parse(&mut self, _: Sp::Tag, _: usize) -> anyhow::Result<()> { + Ok(()) + } + + fn build(self) -> anyhow::Result { + Ok(Void) + } +} diff --git a/crates/lex/Cargo.toml b/crates/lex/Cargo.toml new file mode 100644 index 0000000..df17694 --- /dev/null +++ b/crates/lex/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "copager_lex" +version = "0.2.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_utils = { path = "../utils" } +copager_lex_derive = { path = "../lex_derive", optional = true } + +[features] +default = [] +derive = ["copager_lex_derive"] diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs new file mode 100644 index 0000000..76bd97f --- /dev/null +++ b/crates/lex/src/lib.rs @@ -0,0 +1,19 @@ +use copager_cfg::token::{TokenTag, Token}; +#[cfg(feature = "derive")] +pub use copager_lex_derive::LexSource; + +pub trait LexSource { + type Tag: TokenTag; + + fn ignore_token(&self) -> &str; + fn iter(&self) -> impl Iterator; +} + +pub trait LexDriver +where + Self: Sized, + S: LexSource, +{ + fn try_from(source: S) -> anyhow::Result; + fn run<'input>(&self, input: &'input str) -> impl Iterator>; +} diff --git a/crates/lex_derive/Cargo.toml b/crates/lex_derive/Cargo.toml new file mode 100644 index 0000000..6c9cabd --- /dev/null +++ b/crates/lex_derive/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "copager_lex_derive" +version = "0.2.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +proc-macro2 = "1.0" +quote = "1.0" +syn = { version ="2.0", features = ["full", "extra-traits"] } + +[dev-dependencies] +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex", features = ["derive"] } + +[lib] +proc-macro = true diff --git a/crates/lex_derive/src/impl.rs b/crates/lex_derive/src/impl.rs new file mode 100644 index 0000000..88eb3c4 --- /dev/null +++ b/crates/lex_derive/src/impl.rs @@ -0,0 +1 @@ +pub mod lex; diff --git a/crates/core_derive/src/impl/tokenset.rs b/crates/lex_derive/src/impl/lex.rs similarity index 62% rename from crates/core_derive/src/impl/tokenset.rs rename to crates/lex_derive/src/impl/lex.rs index 1cd4df7..34d1d81 100644 --- a/crates/core_derive/src/impl/tokenset.rs +++ b/crates/lex_derive/src/impl/lex.rs @@ -6,7 +6,7 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { let data_enum = if let Data::Enum(data_enum) = ast.data { data_enum } else { - panic!("\"Tokenset\" proc-macro is only implemented for enum.") + panic!("\"LexSource\" proc-macro is only implemented for enum.") }; let parsed_variantes = data_enum @@ -16,37 +16,37 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { .collect::>(); let enum_name = &ast.ident; + let enum_matcher_table = parsed_variantes + .iter() + .map(|variant| variant.gen_ident_matcher()); let enum_ignored = parsed_variantes .iter() .find(|variant| variant.ignored) - .map(|variant| variant.regex.as_ref().unwrap().as_str()) + .map(|variant| variant.text.as_ref().unwrap().as_str()) .unwrap_or(""); let enum_variants = parsed_variantes .iter() .filter(|variant| !variant.ignored) .map(|variant| variant.gen_ident()); - let enum_regex_table = parsed_variantes - .iter() - .filter(|variant| !variant.ignored) - .map(|variant| variant.gen_ident_with_regex()); quote! { - impl TokenSet<'_> for #enum_name { - fn ignore_str() -> &'static str { - #enum_ignored + impl TokenTag for #enum_name { + fn as_str<'a, 'b>(&'a self) -> &'b str { + match self { + #( #enum_matcher_table, )* + } } + } + + impl LexSource for #enum_name { + type Tag = Self; - fn into_iter() -> impl Iterator { - vec![ - #( #enum_variants, )* - ].into_iter() + fn ignore_token(&self) -> &'static str { + #enum_ignored } - fn into_regex_str(&self) -> &'static str { - match self { - #( #enum_regex_table, )* - _ => unimplemented!(), - } + fn iter(&self) -> impl Iterator { + vec![ #( #enum_variants, )* ].into_iter() } } } @@ -56,7 +56,7 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { struct VariantInfo<'a> { parent_ident: &'a Ident, self_ident: &'a Ident, - regex: Option, + text: Option, ignored: bool, } @@ -64,14 +64,14 @@ impl<'a> VariantInfo<'a> { fn parse(parent_ident: &'a Ident, variant: &'a Variant) -> VariantInfo<'a> { let self_ident = &variant.ident; - let mut regex = None; + let mut text = None; let mut ignored = false; for attr in &variant.attrs { let _ = attr.parse_nested_meta(|meta| { - // #[...(regex = "...")] - if meta.path.is_ident("regex") { - let raw_regex = meta.value()?.parse::()?.value(); - regex = Some(format!("^{}", raw_regex)); + // #[...(text = "...")] + if meta.path.is_ident("text") { + let raw_text = meta.value()?.parse::()?.value(); + text = Some(format!("^{}", raw_text)); return Ok(()); } @@ -88,7 +88,7 @@ impl<'a> VariantInfo<'a> { VariantInfo { parent_ident, self_ident, - regex, + text, ignored, } } @@ -100,11 +100,11 @@ impl<'a> VariantInfo<'a> { quote! { #parent_ident :: #self_ident } } - fn gen_ident_with_regex(&self) -> TokenStream { + fn gen_ident_matcher(&self) -> TokenStream { let ident = self.gen_ident(); - match &self.regex { - Some(regex) => quote! { #ident => #regex }, - None => quote! { unimplemented!() }, + match &self.text { + Some(text) => quote! { #ident => #text }, + None => quote! { #ident => unimplemented!() }, } } } diff --git a/crates/lex_derive/src/lib.rs b/crates/lex_derive/src/lib.rs new file mode 100644 index 0000000..9818d98 --- /dev/null +++ b/crates/lex_derive/src/lib.rs @@ -0,0 +1,9 @@ +mod r#impl; + +use syn::{parse_macro_input, DeriveInput}; + +#[proc_macro_derive(LexSource, attributes(token))] +pub fn derive_tokenset(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let ast = parse_macro_input!(input as DeriveInput); + r#impl::lex::proc_macro_impl(ast).into() +} diff --git a/crates/lex_derive/tests/simple.rs b/crates/lex_derive/tests/simple.rs new file mode 100644 index 0000000..8b8d448 --- /dev/null +++ b/crates/lex_derive/tests/simple.rs @@ -0,0 +1,27 @@ +use copager_cfg::token::TokenTag; +use copager_lex::LexSource; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum MyToken { + #[default] + #[token(text = r"\+")] + Abc, + #[token(text = r"\-")] + Def, + #[token(text = r"[1-9]+")] + Number, +} + + +#[test] +fn check_compile_simple() { + // LexSource + let mytoken = MyToken::default(); + assert!(mytoken.ignore_token().is_empty()); + assert_eq!(mytoken.iter().count(), 3); + + // TokenTag + assert_eq!(MyToken::Abc.as_str(), r"^\+"); + assert_eq!(MyToken::Def.as_str(), r"^\-"); + assert_eq!(MyToken::Number.as_str(), r"^[1-9]+"); +} diff --git a/crates/lex_derive/tests/with_ignored.rs b/crates/lex_derive/tests/with_ignored.rs new file mode 100644 index 0000000..09d1e5e --- /dev/null +++ b/crates/lex_derive/tests/with_ignored.rs @@ -0,0 +1,30 @@ +use copager_cfg::token::TokenTag; +use copager_lex::LexSource; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum MyToken { + #[default] + #[token(text = r"\+")] + Abc, + #[token(text = r"\-")] + Def, + #[token(text = r"[1-9]+")] + Number, + #[token(text = r"[ \t\n]+", ignored)] + _WhiteSpace, +} + + +#[test] +fn check_compile_with_ignored() { + // LexSource + let mytoken = MyToken::default(); + assert_eq!(mytoken.ignore_token(), r"^[ \t\n]+"); + assert_eq!(mytoken.iter().count(), 3); + + // TokenTag + assert_eq!(MyToken::Abc.as_str(), r"^\+"); + assert_eq!(MyToken::Def.as_str(), r"^\-"); + assert_eq!(MyToken::Number.as_str(), r"^[1-9]+"); + assert_eq!(MyToken::_WhiteSpace.as_str(), r"^[ \t\n]+"); +} diff --git a/crates/lex_regex/Cargo.toml b/crates/lex_regex/Cargo.toml new file mode 100644 index 0000000..5e07b94 --- /dev/null +++ b/crates/lex_regex/Cargo.toml @@ -0,0 +1,20 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_lex_regex" +version = "0.2.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +regex = "1.10.4" +regex-macro = "0.2.0" +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_utils = { path = "../utils" } + +[dev-dependencies] +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "." } diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs new file mode 100644 index 0000000..2e22254 --- /dev/null +++ b/crates/lex_regex/src/lib.rs @@ -0,0 +1,68 @@ +#![feature(gen_blocks)] + +use std::rc::Rc; + +use regex::{Regex, RegexSet}; + +use copager_cfg::token::{TokenTag, Token}; +use copager_lex::{LexSource, LexDriver}; + +#[derive(Debug)] +pub struct RegexLexer { + regex_istr: Rc, + regex_set: Rc, + regex_map: Rc>, +} + +impl LexDriver for RegexLexer { + fn try_from(source: S) -> anyhow::Result { + let regex_istr = Regex::new(source.ignore_token())?; + let regex_set = source.iter() + .map(|token| token.as_str()) + .collect::>(); + let regex_set = RegexSet::new(regex_set)?; + let regex_map = source.iter() + .map(|token| Ok((Regex::new(token.as_str())?, token))) + .collect::>>()?; + + Ok(RegexLexer { + regex_istr: Rc::new(regex_istr), + regex_set: Rc::new(regex_set), + regex_map: Rc::new(regex_map), + }) + } + + gen fn run<'input>(&self, input: &'input str) -> Token<'input, S::Tag> { + let mut pos = 0; + loop { + // Skip Spaces + let remain = match self.regex_istr.find(&input[pos..]) { + Some(acc_s) => { + pos += acc_s.len(); + &input[pos..] + } + None => &input[pos..] + }; + + // Find the token + let mut matches = self + .regex_set + .matches(remain) + .into_iter() + .map(|idx| &self.regex_map[idx]) + .map(|(regex, token)| (*token, regex.find(remain).unwrap().as_str())) + .collect::>(); + matches.sort_by(|(_, a), (_, b)| a.len().cmp(&b.len())); + + // Update pos + let (token, acc_s) = match matches.first() { + Some(a) => a, + None => return, + }; + let range = (pos, pos + acc_s.len()); + pos += acc_s.len(); + + yield Token::new(*token, &input, range); + } + } +} diff --git a/crates/lex_regex/tests/simple.rs b/crates/lex_regex/tests/simple.rs new file mode 100644 index 0000000..6a15694 --- /dev/null +++ b/crates/lex_regex/tests/simple.rs @@ -0,0 +1,60 @@ +use copager_cfg::token::{TokenTag, Token}; +use copager_lex::{LexSource, LexDriver}; +use copager_lex_regex::RegexLexer; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +type MyLexer = RegexLexer; + +#[test] +fn simple_success() { + let source = ExprToken::default(); + let lexer = >::try_from(source).unwrap(); + let mut lexer = lexer.run("1 + 2 * 3"); + assert_eq_token(lexer.next(), "1"); + assert_eq_token(lexer.next(), "+"); + assert_eq_token(lexer.next(), "2"); + assert_eq_token(lexer.next(), "*"); + assert_eq_token(lexer.next(), "3"); + assert!(lexer.next().is_none()); +} + +#[test] +#[should_panic] +fn simple_failed() { + let source = ExprToken::default(); + let lexer = >::try_from(source).unwrap(); + let mut lexer = lexer.run("1 + 2 * stop 3"); + assert_eq_token(lexer.next(), "1"); + assert_eq_token(lexer.next(), "+"); + assert_eq_token(lexer.next(), "2"); + assert_eq_token(lexer.next(), "*"); + assert_eq_token(lexer.next(), "3"); + assert!(lexer.next().is_none()); +} + +fn assert_eq_token(token: Option>, s: &str) { + match token { + Some(token) => assert_eq!(token.as_str(), s), + None => panic!("unexpected eof"), + } +} diff --git a/crates/parse/Cargo.toml b/crates/parse/Cargo.toml new file mode 100644 index 0000000..7863532 --- /dev/null +++ b/crates/parse/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "copager_parse" +version = "0.2.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse_derive = { path = "../parse_derive", optional = true } +copager_utils = { path = "../utils" } + +[features] +default = [] +derive = ["copager_parse_derive"] diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs new file mode 100644 index 0000000..760f467 --- /dev/null +++ b/crates/parse/src/lib.rs @@ -0,0 +1,55 @@ +use copager_cfg::token::{TokenTag, Token}; +use copager_cfg::rule::{RuleTag, RuleSet}; +use copager_lex::LexSource; +#[cfg(feature = "derive")] +pub use copager_parse_derive::ParseSource; + +pub trait ParseSource { + type Tag: RuleTag; + + fn iter(&self) -> impl Iterator; + + fn into_ruleset(&self) -> RuleSet { + let set_id_for_all = |(id, tag): (usize, Self::Tag)| { + tag.as_rules() + .into_iter() + .map(move |rule| { + let mut rule = rule.clone(); + rule.id = id; + rule + }) + }; + self.iter() + .enumerate() + .flat_map(set_id_for_all) + .collect::>() + } +} + +pub trait ParseDriver +where + Self: Sized, + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from(source: (Sl, Sp)) -> anyhow::Result; + fn run<'input, Il>(&self, lexer: Il) -> impl Iterator> + where + Il: Iterator>; +} + +pub enum ParseEvent<'input, T, R> +where + T: TokenTag, + R: RuleTag, +{ + // Parsing Event + Read(Token<'input, T>), + Parse { + rule: R, + len: usize, + }, + + // Control + Err(anyhow::Error), +} diff --git a/crates/parse_derive/Cargo.toml b/crates/parse_derive/Cargo.toml new file mode 100644 index 0000000..dc2fd69 --- /dev/null +++ b/crates/parse_derive/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "copager_parse_derive" +version = "0.2.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +proc-macro2 = "1.0" +quote = "1.0" +syn = { version ="2.0", features = ["full", "extra-traits"] } + +[dev-dependencies] +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_parse = { path = "../parse", features = ["derive"] } + +[lib] +proc-macro = true diff --git a/crates/parse_derive/src/impl.rs b/crates/parse_derive/src/impl.rs new file mode 100644 index 0000000..90d8760 --- /dev/null +++ b/crates/parse_derive/src/impl.rs @@ -0,0 +1 @@ +pub mod rule; diff --git a/crates/parse_derive/src/impl/rule.rs b/crates/parse_derive/src/impl/rule.rs new file mode 100644 index 0000000..5031024 --- /dev/null +++ b/crates/parse_derive/src/impl/rule.rs @@ -0,0 +1,117 @@ +use proc_macro2::TokenStream; +use quote::quote; +use syn::{Data, DeriveInput, Variant, Ident, LitStr}; + +pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { + let data_enum = if let Data::Enum(data_enum) = ast.data { + data_enum + } else { + panic!("\"ParseResource\" proc-macro is only implemented for enum.") + }; + + let parsed_variantes = data_enum + .variants + .iter() + .map(|variant| VariantInfo::parse(&ast.ident, variant)) + .collect::>(); + + let enum_name = &ast.ident; + let enum_matcher_table_i2r = parsed_variantes + .iter() + .map(|variant| variant.gen_matcher_ident_to_rule()); + let enum_assoc_type = format!("{}", enum_name) + .replace("Rule", "Token") + .parse::() + .unwrap(); + let enum_variants = parsed_variantes + .iter() + .map(|variant| variant.gen_ident()); + + quote! { + impl RuleTag<#enum_assoc_type> for #enum_name { + fn as_rules(&self) -> Vec> { + match self { + #( #enum_matcher_table_i2r, )* + } + } + } + + impl ParseSource<#enum_assoc_type> for #enum_name { + type Tag = Self; + + fn iter(&self) -> impl Iterator { + vec![ #( #enum_variants, )* ].into_iter() + } + } + } +} + +struct VariantInfo<'a> { + parent_ident: &'a Ident, + self_ident: &'a Ident, + rules: Vec, +} + +impl<'a> VariantInfo<'a> { + fn parse(parent_ident: &'a Ident, variant: &'a Variant) -> VariantInfo<'a> { + let self_ident = &variant.ident; + let token_ident = format!("{}", parent_ident) + .replace("Rule", "Token") + .parse::() + .unwrap(); + + let mut rules = vec![]; + for attr in &variant.attrs { + if attr.path().is_ident("rule") { + let attr = attr.parse_args::().unwrap().value(); + rules.push(parse_rule(&token_ident, &attr)); + } + } + + VariantInfo { + parent_ident, + self_ident, + rules, + } + } + + fn gen_ident(&self) -> TokenStream { + let parent_ident = self.parent_ident; + let self_ident = self.self_ident; + + quote! { #parent_ident :: #self_ident } + } + + fn gen_matcher_ident_to_rule(&self) -> TokenStream { + let ident = self.gen_ident(); + if self.rules.is_empty() { + quote! { #ident => unimplemented!() } + } else { + let rules = &self.rules; + quote! { #ident => vec![#(#rules),*] } + } + } +} + +fn parse_rule(token: &TokenStream, input: &str) -> TokenStream { + let mut splitted = input.split("::="); + + let lhs = splitted.next().unwrap().trim(); + let lhs = &lhs[1..lhs.len() - 1]; + let lhs = quote! { RuleElem::new_nonterm(#lhs) }; + + let rhs = splitted.collect::() + .split_whitespace() + .map(|elem| { + if elem.starts_with('<') { + let elem = &elem[1..elem.len() - 1]; + quote! { RuleElem::new_nonterm(#elem) } + } else { + let ident = elem.parse::().unwrap(); + quote! { RuleElem::new_term(#token::#ident) } + } + }) + .collect::>(); + + quote! { Rule::from((#lhs, vec![ #( #rhs, )* ])) } +} diff --git a/crates/parse_derive/src/lib.rs b/crates/parse_derive/src/lib.rs new file mode 100644 index 0000000..864b4fc --- /dev/null +++ b/crates/parse_derive/src/lib.rs @@ -0,0 +1,9 @@ +mod r#impl; + +use syn::{parse_macro_input, DeriveInput}; + +#[proc_macro_derive(ParseSource, attributes(rule))] +pub fn derive_parse_source(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let ast = parse_macro_input!(input as DeriveInput); + r#impl::rule::proc_macro_impl(ast).into() +} diff --git a/crates/parse_derive/tests/simple.rs b/crates/parse_derive/tests/simple.rs new file mode 100644 index 0000000..4864d3b --- /dev/null +++ b/crates/parse_derive/tests/simple.rs @@ -0,0 +1,40 @@ +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_cfg::token::TokenTag; +use copager_lex::LexSource; +use copager_parse::ParseSource; + +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum MyToken { + #[token(text = r"\+")] + Plus, + #[token(text = r"\-")] + Minus, + #[token(text = r"[1-9]+")] + Number, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum MyRule { + #[default] + #[rule(" ::= Plus Number")] + #[rule(" ::= Minus Number")] + #[rule(" ::= Number")] + Expr, +} + +#[test] +fn check_compile_simple() { + // ParseSource + let myrule = MyRule::default(); + assert_eq!(myrule.iter().count(), 1); + + // RuleTag + let rules = MyRule::Expr.as_rules(); + assert_eq!(rules.len(), 3); + assert_eq!(rules[0].lhs, RuleElem::new_nonterm("expr")); + assert_eq!(rules[0].rhs, vec![RuleElem::new_nonterm("expr"), RuleElem::new_term(MyToken::Plus), RuleElem::new_term(MyToken::Number)]); + assert_eq!(rules[1].lhs, RuleElem::new_nonterm("expr")); + assert_eq!(rules[1].rhs, vec![RuleElem::new_nonterm("expr"), RuleElem::new_term(MyToken::Minus), RuleElem::new_term(MyToken::Number)]); + assert_eq!(rules[2].lhs, RuleElem::new_nonterm("expr")); + assert_eq!(rules[2].rhs, vec![RuleElem::new_term(MyToken::Number)]); +} diff --git a/crates/parse_lr1/Cargo.toml b/crates/parse_lr1/Cargo.toml new file mode 100644 index 0000000..3d628d4 --- /dev/null +++ b/crates/parse_lr1/Cargo.toml @@ -0,0 +1,23 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr1" +version = "0.2.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true } +itertools = "0.12.1" +copager_core = { path = "../core" } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_utils = { path = "../utils" } + +[dev-dependencies] +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_parse_lr1 = { path = "../parse_lr1" } diff --git a/crates/algorithm_lr1/src/builder.rs b/crates/parse_lr1/src/builder.rs similarity index 68% rename from crates/algorithm_lr1/src/builder.rs rename to crates/parse_lr1/src/builder.rs index 488b96f..5978ed8 100644 --- a/crates/algorithm_lr1/src/builder.rs +++ b/crates/parse_lr1/src/builder.rs @@ -1,48 +1,49 @@ use std::collections::{HashMap, HashSet}; use std::hash::Hash; -use std::marker::PhantomData; -use serde::{Serialize, Deserialize}; use itertools::Itertools; +use serde::{Serialize, Deserialize}; -use pgen_core::cfg::{TokenSet, Syntax, Rule, RuleElem, RuleSet}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet}; +use copager_lex::LexSource; +use copager_parse::ParseSource; #[derive(Debug, Serialize, Deserialize)] -pub(super) enum LRAction { +pub enum LRAction { Shift(usize), - Reduce(S, usize, usize), // syntax, goto_id, elems_cnt + Reduce(R, usize, usize), // tag, goto_id, elems_cnt Accept, None, } #[derive(Debug, Serialize, Deserialize)] -pub(super) struct LR1Configure<'a, T, S> +pub struct LR1Configure where - T: TokenSet<'a>, - S: Syntax<'a, TokenSet = T>, + Sl: LexSource, + Sp: ParseSource, { - // LR Tables - pub action_table: Vec>>, - pub eof_action_table: Vec>, + #[serde(bound( + serialize = "Sl::Tag: Serialize, Sp::Tag: Serialize", + deserialize = "Sl::Tag: Deserialize<'de>, Sp::Tag: Deserialize<'de>", + ))] + pub action_table: Vec>>, + pub eof_action_table: Vec>, pub goto_table: Vec>, - - // PhantomData - tokenset: PhantomData<&'a T>, } -impl<'a, T, S> LR1Configure<'a, T, S> +impl LR1Configure where - T: TokenSet<'a>, - S: Syntax<'a, TokenSet = T>, + Sl: LexSource, + Sp: ParseSource, { - pub fn setup() -> anyhow::Result { + pub fn new(source_l: &Sl, source_p: &Sp) -> anyhow::Result { // 1. Pre-process - let rules = S::into_iter().collect::>(); - let ruleset = S::into_ruleset(); + let ruleset = source_p.into_ruleset(); let first_set = ruleset.first_set(); // 2. Generate dummy nonterm - let top_dummy: Rule = Rule::from(( + let top_dummy: Rule = Rule::from(( RuleElem::new_nonterm("__top_dummy"), vec![RuleElem::new_nonterm(&ruleset.top)], )); @@ -54,7 +55,7 @@ where let lr_items = lr_items.expand_closure(&ruleset, &first_set); // 3. Generate a DFA - let dfa = LRItemDFA::gen(lr_items, &ruleset, &first_set); + let dfa = LRItemDFA::r#gen(lr_items, &ruleset, &first_set); // 4. Initialize tables let mut idx = 0; @@ -68,20 +69,21 @@ where } } - let mut action_table: Vec>> = Vec::with_capacity(dfa.0.len()); - let mut eof_action_table: Vec> = Vec::with_capacity(dfa.0.len()); + let mut action_table: Vec>> = Vec::with_capacity(dfa.0.len()); + let mut eof_action_table: Vec> = Vec::with_capacity(dfa.0.len()); let mut goto_table: Vec> = Vec::with_capacity(dfa.0.len()); for _ in 0..dfa.0.len() { action_table.push(HashMap::from_iter( - T::into_iter() + source_l.iter() .map(|token| (token, LRAction::None)) - .collect::)>>(), + .collect::)>>(), )); eof_action_table.push(LRAction::None); goto_table.push(vec![0; nonterm_table.keys().len()]); } // 5. Setup tables + let rule_tags = source_p.iter().collect::>(); for lritem_set in &dfa.0 { for (token, next) in &lritem_set.next { match &token { @@ -92,7 +94,7 @@ where } RuleElem::Term(t) => { let id = lritem_set.id as usize; - let label = action_table[id].get_mut(&t.0).unwrap(); + let label = action_table[id].get_mut(t).unwrap(); *label = LRAction::Shift(*next as usize); } _ => {} @@ -107,9 +109,9 @@ where for la_token in &item.la_tokens { if let RuleElem::Term(t) = la_token { let id = lritem_set.id as usize; - let label = action_table[id].get_mut(&t.0).unwrap(); + let label = action_table[id].get_mut(t).unwrap(); *label = LRAction::Reduce( - rules[item.rule.id as usize], + rule_tags[item.rule.id as usize], *nonterm_table.get(lhs).unwrap(), item.rule.rhs.len(), ); @@ -120,7 +122,7 @@ where LRAction::Accept } else { LRAction::Reduce( - rules[item.rule.id as usize], + rule_tags[item.rule.id as usize], *nonterm_table.get(lhs).unwrap(), item.rule.rhs.len(), ) @@ -135,23 +137,22 @@ where action_table, eof_action_table, goto_table, - tokenset: PhantomData, }) } } #[derive(Debug)] -struct LRItemDFA<'a, 'b, T: TokenSet<'a>> ( - Vec> +struct LRItemDFA<'a, T: TokenTag> ( + Vec> ); -impl<'a, 'b, T: TokenSet<'a>> LRItemDFA<'a, 'b, T> { - fn gen( - init_set: LRItemSet<'a, 'b, T>, - ruleset: &'b RuleSet<'a, T>, - first_set: &HashMap<&'b RuleElem<'a, T>, Vec<&'b RuleElem<'a, T>>>, - ) -> LRItemDFA<'a, 'b, T> { - let issue_id = |old_sets: &Vec>, set: &LRItemSet<'a, 'b, T>| { +impl<'a, T: TokenTag> LRItemDFA<'a, T> { + fn r#gen( + init_set: LRItemSet<'a, T>, + ruleset: &'a RuleSet, + first_set: &HashMap<&'a RuleElem, Vec<&'a RuleElem>>, + ) -> LRItemDFA<'a, T> { + let issue_id = |old_sets: &Vec>, set: &LRItemSet<'a, T>| { if let Some(ex_set) = old_sets.iter().find(|&set0| set0.strict_eq(set)) { Err(ex_set.id) } else { @@ -187,29 +188,27 @@ impl<'a, 'b, T: TokenSet<'a>> LRItemDFA<'a, 'b, T> { } } -#[derive(Clone, Debug)] -struct LRItemSet<'a, 'b, T: TokenSet<'a>> { +#[derive(Clone, Debug, Eq)] +struct LRItemSet<'a, T: TokenTag> { id: i32, - next: HashMap<&'b RuleElem<'a, T>, i32>, - lr_items: HashSet>, + next: HashMap<&'a RuleElem, i32>, + lr_items: HashSet>, } -impl<'a, 'b, T: TokenSet<'a>> PartialEq for LRItemSet<'a, 'b, T> { - fn eq(&self, other: &LRItemSet<'a, 'b, T>) -> bool { +impl<'a, T: TokenTag> PartialEq for LRItemSet<'a, T> { + fn eq(&self, other: &LRItemSet<'a, T>) -> bool { self.lr_items == other.lr_items } } -impl<'a, 'b, T: TokenSet<'a>> PartialEq>> for LRItemSet<'a, 'b, T> { - fn eq(&self, other: &HashSet>) -> bool { +impl<'a, T: TokenTag> PartialEq>> for LRItemSet<'a, T> { + fn eq(&self, other: &HashSet>) -> bool { &self.lr_items == other } } -impl<'a, 'b, T: TokenSet<'a>> Eq for LRItemSet<'a, 'b, T> {} - -impl<'a, 'b, T: TokenSet<'a>> LRItemSet<'a, 'b, T> { - fn new(id: i32, lr_items: HashSet>) -> Self { +impl<'a, T: TokenTag> LRItemSet<'a, T> { + fn new(id: i32, lr_items: HashSet>) -> Self { LRItemSet { id, next: HashMap::new(), @@ -226,23 +225,23 @@ impl<'a, 'b, T: TokenSet<'a>> LRItemSet<'a, 'b, T> { .all(|item| other.lr_items.iter().any(|item_b| item_b.strict_eq(item))) } - fn expand_closure<'c>( + fn expand_closure<'b>( mut self, - ruleset: &'b RuleSet<'a, T>, - first_set: &'c HashMap<&'b RuleElem<'a, T>, Vec<&'b RuleElem<'a, T>>>, - ) -> LRItemSet<'a, 'b, T> { + ruleset: &'a RuleSet, + first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, + ) -> LRItemSet<'a, T> { let mut lr_items = self.lr_items.clone(); let mut lr_items_fetched = self.lr_items; loop { - let new_items: Vec> = lr_items_fetched + let new_items: Vec> = lr_items_fetched .iter() .flat_map(|item| item.expand_closure(ruleset, first_set)) .collect(); - let new_items = LRItem::<'_, '_, _>::unify_all(new_items); + let new_items = LRItem::<'_, _>::unify_all(new_items); let new_items = HashSet::from_iter(new_items); let bef_len = lr_items.len(); - lr_items = LRItem::<'_, '_, _>::unity_set(lr_items, new_items.clone()); + lr_items = LRItem::<'_, _>::unity_set(lr_items, new_items.clone()); let af_len = lr_items.len(); if bef_len == af_len { break; @@ -254,18 +253,18 @@ impl<'a, 'b, T: TokenSet<'a>> LRItemSet<'a, 'b, T> { self } - fn gen_next_sets<'c>( + fn gen_next_sets<'b>( &self, - ruleset: &'b RuleSet<'a, T>, - first_set: &'c HashMap<&'b RuleElem<'a, T>, Vec<&'b RuleElem<'a, T>>>, - ) -> HashMap<&'b RuleElem<'a, T>, LRItemSet<'a, 'b, T>> { - let new_items: Vec<(&'b RuleElem<'a, T>, LRItem<'a, 'b, T>)> = self + ruleset: &'a RuleSet, + first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, + ) -> HashMap<&'a RuleElem, LRItemSet<'a, T>> { + let new_items: Vec<(&'a RuleElem, LRItem<'a, T>)> = self .lr_items .iter() .filter_map(|lr_item| lr_item.next_dot()) .collect(); - let mut new_sets: HashMap<&RuleElem, HashSet>> = HashMap::new(); + let mut new_sets: HashMap<&RuleElem, HashSet>> = HashMap::new(); for (bef_token, lr_item) in new_items { if new_sets.get(&bef_token).is_none() { new_sets.insert(bef_token, HashSet::new()); @@ -273,7 +272,7 @@ impl<'a, 'b, T: TokenSet<'a>> LRItemSet<'a, 'b, T> { new_sets.get_mut(&bef_token).unwrap().insert(lr_item); } - let mut new_sets_expanded: HashMap<&'b RuleElem<'a, T>, LRItemSet<'_, '_, _>> = HashMap::new(); + let mut new_sets_expanded: HashMap<&'a RuleElem, LRItemSet<'_, _>> = HashMap::new(); for (ktoken, new_set) in new_sets { let new_set = LRItemSet::new(0, new_set); let new_set = new_set.expand_closure(ruleset, first_set); @@ -284,30 +283,28 @@ impl<'a, 'b, T: TokenSet<'a>> LRItemSet<'a, 'b, T> { } } -#[derive(Clone, Debug)] -struct LRItem<'a, 'b, T: TokenSet<'a>> { - rule: &'b Rule<'a, T>, +#[derive(Clone, Debug, Eq)] +struct LRItem<'a, T: TokenTag> { + rule: &'a Rule, dot_pos: usize, - la_tokens: HashSet<&'b RuleElem<'a, T>>, + la_tokens: HashSet<&'a RuleElem>, } -impl<'a, 'b, T: TokenSet<'a>> Hash for LRItem<'a, 'b, T> { +impl<'a, T: TokenTag> Hash for LRItem<'a, T> { fn hash(&self, state: &mut H) { self.rule.hash(state); self.dot_pos.hash(state); } } -impl<'a, 'b, T: TokenSet<'a>> PartialEq for LRItem<'a, 'b, T> { +impl<'a, T: TokenTag> PartialEq for LRItem<'a, T> { fn eq(&self, other: &Self) -> bool { self.rule == other.rule && self.dot_pos == other.dot_pos } } -impl<'a, 'b, T: TokenSet<'a>> Eq for LRItem<'a, 'b, T> {} - -impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { - fn new(rule: &'b Rule<'a, T>, la_tokens: HashSet<&'b RuleElem<'a, T>>) -> LRItem<'a, 'b, T> { +impl<'a, T: TokenTag> LRItem<'a, T> { + fn new(rule: &'a Rule, la_tokens: HashSet<&'a RuleElem>) -> LRItem<'a, T> { LRItem { rule, dot_pos: 0, @@ -321,11 +318,11 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { && self.la_tokens == other.la_tokens } - fn expand_closure<'c>( + fn expand_closure<'b>( &self, - ruleset: &'b RuleSet<'a, T>, - first_set: &'c HashMap<&'b RuleElem<'a, T>, Vec<&'b RuleElem<'a, T>>>, - ) -> HashSet> { + ruleset: &'a RuleSet, + first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, + ) -> HashSet> { let af_la_tokens = if self.dot_pos + 1 < self.rule.rhs.len() { HashSet::from_iter( first_set @@ -343,7 +340,7 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { ruleset .find_rule(&self.rule.rhs[self.dot_pos]) .into_iter() - .map(|rule| LRItem::<'_, '_, _>::new(rule, af_la_tokens.clone())) + .map(|rule| LRItem::<'_, _>::new(rule, af_la_tokens.clone())) .collect() } else { HashSet::new() @@ -351,7 +348,7 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { } #[allow(clippy::int_plus_one)] - fn next_dot(&self) -> Option<(&'b RuleElem<'a, T>, LRItem<'a, 'b, T>)> { + fn next_dot(&self) -> Option<(&'a RuleElem, LRItem<'a, T>)> { if self.dot_pos + 1 <= self.rule.rhs.len() { let bef_token = &self.rule.rhs[self.dot_pos]; let item = LRItem { @@ -365,7 +362,7 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { } } - fn unify(&mut self, other: LRItem<'a, 'b, T>) { + fn unify(&mut self, other: LRItem<'a, T>) { if self != &other { return; } @@ -376,7 +373,7 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { }); } - fn unify_all(mut items: Vec>) -> Vec> { + fn unify_all(mut items: Vec>) -> Vec> { for idx in (0..items.len()).permutations(2) { let (a_idx, b_idx) = (idx[0], idx[1]); let tmp = items[b_idx].clone(); @@ -386,9 +383,9 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { } fn unity_set( - items_a: HashSet>, - items_b: HashSet>, - ) -> HashSet> { + items_a: HashSet>, + items_b: HashSet>, + ) -> HashSet> { let mut items_a = Vec::from_iter(items_a); let items_b = Vec::from_iter(items_b); items_a.extend(items_b); diff --git a/crates/algorithm_lr1/src/error.rs b/crates/parse_lr1/src/error.rs similarity index 63% rename from crates/algorithm_lr1/src/error.rs rename to crates/parse_lr1/src/error.rs index 025eb7e..4cbb467 100644 --- a/crates/algorithm_lr1/src/error.rs +++ b/crates/parse_lr1/src/error.rs @@ -1,8 +1,7 @@ use thiserror::Error; -use pgen_core::error::ParseError as SuperParseError; -use pgen_core::cfg::TokenSet; -use pgen_core::lex::Token; +use copager_core::error::ParseError as SuperParseError; +use copager_cfg::token::{TokenTag, Token}; #[derive(Debug, Error)] pub enum ParseError { @@ -15,10 +14,7 @@ pub enum ParseError { } impl ParseError { - pub fn new_unexpected_token<'a, T>(expected: Token<'a, '_, T>) -> SuperParseError - where - T: TokenSet<'a>, - { + pub fn new_unexpected_token(expected: Token) -> SuperParseError { let err = ParseError::UnexpectedToken { actual: format!("{:?}", expected.kind), }; diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs new file mode 100644 index 0000000..0e1a754 --- /dev/null +++ b/crates/parse_lr1/src/lib.rs @@ -0,0 +1,98 @@ +#![feature(gen_blocks)] + +mod error; +mod builder; + +use std::collections::HashMap; + +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::Token; +use copager_lex::LexSource; +use copager_parse::{ParseSource, ParseDriver, ParseEvent}; +use copager_utils::cache::Cacheable; + +use builder::{LR1Configure, LRAction}; +use error::ParseError; + +#[derive(Debug)] +pub struct LR1 +where + Sl: LexSource, + Sp: ParseSource, +{ + tables: LR1Configure, +} + +impl Cacheable<(Sl, Sp)> for LR1 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LR1Configure; + + fn new((source_l, source_p): (Sl, Sp)) -> anyhow::Result { + Ok(LR1Configure::new(&source_l, &source_p)?) + } + + fn restore(tables: Self::Cache) -> Self { + LR1 { tables } + } +} + +impl ParseDriver for LR1 +where + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from((source_l, source_p): (Sl, Sp)) -> anyhow::Result { + let tables = LR1Configure::new(&source_l, &source_p)?; + Ok(LR1 { tables }) + } + + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut stack = vec![0]; + loop { + let token = lexer.next(); + loop { + let top = stack[stack.len() - 1]; + let action = match token { + Some(token) => { + let local_action_table: &HashMap<_, _> = &self.tables.action_table[top]; + (local_action_table.get(&token.kind).unwrap(), Some(token)) + }, + None => (&self.tables.eof_action_table[top], None), + }; + match action { + (LRAction::Shift(new_state), Some(token)) => { + stack.push(*new_state); + yield ParseEvent::Read(token); + break; + } + (LRAction::Reduce(tag, goto, elems_cnt), _) => { + stack.truncate(stack.len() - elems_cnt); + stack.push(self.tables.goto_table[stack[stack.len() - 1]][*goto]); + yield ParseEvent::Parse { rule: *tag, len: *elems_cnt }; + } + (LRAction::Accept, _) => { + return; + } + (LRAction::None, Some(token)) => { + yield ParseEvent::Err(ParseError::new_unexpected_token(token).into()); + return; + } + (LRAction::None, None) => { + yield ParseEvent::Err(ParseError::UnexpectedEOF.into()); + return; + } + _ => unreachable!(), + } + } + } + } +} diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr1/tests/simple.rs new file mode 100644 index 0000000..1acd706 --- /dev/null +++ b/crates/parse_lr1/tests/simple.rs @@ -0,0 +1,105 @@ +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::{LexSource, LexDriver}; +use copager_lex_regex::RegexLexer; +use copager_parse::{ParseSource, ParseDriver, ParseEvent}; +use copager_parse_lr1::LR1; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize +)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize +)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyLexer = RegexLexer; +type MyParser = LR1; + +const OK_INPUTS: [&str; 10] = [ + "10", + "10 + 20", + "10 - 20", + "10 * 20", + "10 / 20", + "10 + 20 * 30 - 40", + "(10)", + "((((10))))", + "10 * (20 - 30)", + "((10 + 20) * (30 / 40)) - 50", +]; + +const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "*", + "10 20 + 30", + "10 + 20 * 30 / 40 (", + "(((10))", +]; + +#[test] +fn simple_success() { + for input in &OK_INPUTS { + assert!(parse(input), "{}", input); + } +} + +#[test] +fn simple_failure() { + for input in &ERR_INPUTS { + assert!(!parse(input), "{}", input); + } +} + +fn parse<'input>(input: &'input str) -> bool { + let source = ExprToken::default(); + let lexer = >::try_from(source).unwrap(); + + let source = (ExprToken::default(), ExprRule::default()); + let parser = >::try_from(source).unwrap(); + + let mut parse_itr = parser.run(lexer.run(input)); + let is_err = |state| matches!(state, ParseEvent::Err(_)); + let err_happened = parse_itr.any(is_err); + + !err_happened +} diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml new file mode 100644 index 0000000..3a9ffc7 --- /dev/null +++ b/crates/utils/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "copager_utils" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true } diff --git a/crates/utils/src/cache.rs b/crates/utils/src/cache.rs new file mode 100644 index 0000000..11cc551 --- /dev/null +++ b/crates/utils/src/cache.rs @@ -0,0 +1,11 @@ +use serde::{Serialize, Deserialize}; + +pub trait Cacheable +where + Self: Sized, +{ + type Cache: Serialize + for<'de> Deserialize<'de>; + + fn new(from: F) -> anyhow::Result; + fn restore(cache: Self::Cache) -> Self; +} diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs new file mode 100644 index 0000000..a5c08fd --- /dev/null +++ b/crates/utils/src/lib.rs @@ -0,0 +1 @@ +pub mod cache; diff --git a/examples/expr.rs b/examples/expr.rs deleted file mode 100644 index 6f7b7a7..0000000 --- a/examples/expr.rs +++ /dev/null @@ -1,60 +0,0 @@ -use std::io::stdin; - -use parsergen::algorithm::LR1; -use parsergen::cfg::*; -use parsergen::error::ParseError; -use parsergen::Parser; - -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] -enum ExprTokenSet { - #[token(regex = r"\+")] - Plus, - #[token(regex = r"-")] - Minus, - #[token(regex = r"\*")] - Mul, - #[token(regex = r"/")] - Div, - #[token(regex = r"\(")] - BracketL, - #[token(regex = r"\)")] - BracketR, - #[token(regex = r"[1-9][0-9]*")] - Num, - #[token(regex = r"[ \t\n]+", ignored)] - _Whitespace, -} - -#[derive(Debug, Clone, Copy, Syntax)] -enum ExprSyntax { - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} - -type ExprParser<'a> = Parser::<'a, LR1<'a, ExprTokenSet, ExprSyntax>>; - -fn main() -> anyhow::Result<()> { - let mut input = String::new(); - stdin().read_line(&mut input)?; - - match ExprParser::new()?.parse(&input) { - Ok(sexp) => println!("Accepted : {}", sexp), - Err(e) => { - if let Some(e) = e.downcast_ref::() { - e.pretty_print(); - } - println!("Rejected : {}", e); - } - }; - - Ok(()) -} diff --git a/examples/oneshot/Cargo.toml b/examples/oneshot/Cargo.toml new file mode 100644 index 0000000..b38a4b8 --- /dev/null +++ b/examples/oneshot/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "example_oneshot" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager = { path = "../..", features = ["derive", "regexlex", "lr1", "sexp"] } diff --git a/examples/oneshot/src/main.rs b/examples/oneshot/src/main.rs new file mode 100644 index 0000000..8971e61 --- /dev/null +++ b/examples/oneshot/src/main.rs @@ -0,0 +1,62 @@ +use std::io::stdin; + +use copager::lex::{LexSource, RegexLexer}; +use copager::parse::{ParseSource, LR1}; +use copager::ir::SExp; +use copager::prelude::*; +use copager::{Grammar, Processor}; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyGrammar = Grammar; +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +fn main() -> anyhow::Result<()> { + let mut input = String::new(); + stdin().read_line(&mut input)?; + + let sexp = MyProcessor::new() + .build_lexer()? + .build_parser()? + .process::>(&input)?; + println!("Success : {}", sexp); + + Ok(()) +} diff --git a/examples/prebuild/Cargo.toml b/examples/prebuild/Cargo.toml new file mode 100644 index 0000000..c3ffa74 --- /dev/null +++ b/examples/prebuild/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "example_prebuild" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager = { path = "../..", features = ["prebuild", "sexp"] } +grammar = { package = "example_prebuild_grammar", path = "./grammar" } + +[build-dependencies] +serde = { workspace = true } +copager = { path = "../..", features = ["prebuild"] } +grammar = { package = "example_prebuild_grammar", path = "./grammar" } diff --git a/examples/prebuild/build.rs b/examples/prebuild/build.rs new file mode 100644 index 0000000..69c88be --- /dev/null +++ b/examples/prebuild/build.rs @@ -0,0 +1,8 @@ +use grammar::MyProcessor; + +#[copager::prebuild] +fn main() -> MyProcessor { + MyProcessor::new() + .prebuild_parser() + .unwrap() +} diff --git a/examples/prebuild/grammar/Cargo.toml b/examples/prebuild/grammar/Cargo.toml new file mode 100644 index 0000000..440c658 --- /dev/null +++ b/examples/prebuild/grammar/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "example_prebuild_grammar" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true } +copager = { path = "../../..", features = ["derive", "regexlex", "lr1", "sexp"] } diff --git a/examples/prebuild/grammar/src/lib.rs b/examples/prebuild/grammar/src/lib.rs new file mode 100644 index 0000000..c1de489 --- /dev/null +++ b/examples/prebuild/grammar/src/lib.rs @@ -0,0 +1,54 @@ +use serde::{Deserialize, Serialize}; + +use copager::lex::{LexSource, RegexLexer}; +use copager::parse::{ParseSource, LR1}; +use copager::prelude::*; +use copager::{Grammar, Processor}; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize, +)] +pub enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize, +)] +pub enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +pub type MyGrammar = Grammar; +pub type MyLexer = RegexLexer; +pub type MyParser = LR1; +pub type MyProcessor = Processor; diff --git a/examples/prebuild/src/main.rs b/examples/prebuild/src/main.rs new file mode 100644 index 0000000..ccb8ee7 --- /dev/null +++ b/examples/prebuild/src/main.rs @@ -0,0 +1,19 @@ +use std::io::stdin; + +use copager::ir::SExp; + +use grammar::MyProcessor; + +#[copager::load] +fn main(processor: MyProcessor) -> anyhow::Result<()> { + let mut input = String::new(); + stdin().read_line(&mut input)?; + + let sexp = processor + .build_lexer()? + .build_parser_by_cache() + .process::>(&input)?; + println!("Success : {}", sexp); + + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index f7f0c81..5b802dc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,36 @@ -pub use pgen_core::*; -pub use pgen_algorithm as algorithm; +pub use copager_core::*; +pub use copager_cfg as cfg; + +#[cfg(feature = "prebuild")] +pub use copager_core_macros::*; + +#[cfg(feature = "prebuild")] +pub mod prebuild { + pub use serde_json::to_string as serialize; + pub use serde_json::from_str as deserialize; +} + +pub mod lex { + pub use copager_lex::*; + #[cfg(feature = "regexlex")] + pub use copager_lex_regex::*; +} + +pub mod parse { + pub use copager_parse::*; + #[cfg(feature = "lr1")] + pub use copager_parse_lr1::*; +} + +pub mod ir { + pub use copager_ir::*; + #[cfg(feature = "void")] + pub use copager_ir_void::*; + #[cfg(feature = "sexp")] + pub use copager_ir_sexp::*; +} + +pub mod prelude { + pub use copager_cfg::rule::{RuleTag, Rule, RuleElem}; + pub use copager_cfg::token::TokenTag; +} diff --git a/tests/derive.rs b/tests/derive.rs deleted file mode 100644 index 87d107f..0000000 --- a/tests/derive.rs +++ /dev/null @@ -1,42 +0,0 @@ -use parsergen::cfg::*; - -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] -enum TestTokenSet { - #[token(regex = r"\+")] - Plus, - #[token(regex = r"-")] - Minus, - #[token(regex = r"\*")] - Mul, - #[token(regex = r"/")] - Div, - #[token(regex = r"\(")] - BracketL, - #[token(regex = r"\)")] - BracketR, - #[token(regex = r"[1-9][0-9]*")] - Num, - #[token(regex = r"[ \t\n]+", ignored)] - _Whitespace, -} - -#[derive(Debug, Clone, Copy, Syntax)] -enum TestSyntax { - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} - -#[test] -fn check_compile() { - let _ = TestTokenSet::into_regex(&self::TestTokenSet::Plus); - let _ = TestSyntax::into_rules(&self::TestSyntax::Expr); -} diff --git a/tests/serde.rs b/tests/serde.rs deleted file mode 100644 index ce2a274..0000000 --- a/tests/serde.rs +++ /dev/null @@ -1,51 +0,0 @@ -use serde::{Serialize, Deserialize}; - -use parsergen::algorithm::LR1; -use parsergen::cfg::*; -use parsergen::Parser; - -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, TokenSet)] -enum TestTokenSet { - #[token(regex = r"\+")] - Plus, - #[token(regex = r"-")] - Minus, - #[token(regex = r"\*")] - Mul, - #[token(regex = r"/")] - Div, - #[token(regex = r"\(")] - BracketL, - #[token(regex = r"\)")] - BracketR, - #[token(regex = r"[1-9][0-9]*")] - Num, - #[token(regex = r"[ \t\n]+", ignored)] - _Whitespace, -} - -#[derive(Debug, Clone, Copy, Serialize, Deserialize, Syntax)] -enum TestSyntax { - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} - -#[test] -fn serde() { - type TestParser<'a> = Parser::<'a, LR1<'a, TestTokenSet, TestSyntax>>; - - let parser = TestParser::new().unwrap(); - let serialized = serde_json::to_string(&parser).unwrap(); - let deserialized: TestParser = serde_json::from_str(&serialized).unwrap(); - - deserialized.parse("10 * (20 - 30)").unwrap(); -}