diff --git a/Cargo.lock b/Cargo.lock index dd06f09..2067388 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -32,7 +32,11 @@ dependencies = [ "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_common", + "copager_parse_lr_common", + "copager_parse_lr_lr0", + "copager_parse_lr_lr1", + "copager_parse_lr_slr1", "example_lang_arithmetic", "example_lang_json", "example_lang_pl0", @@ -46,6 +50,7 @@ name = "copager_cfg" version = "0.2.0" dependencies = [ "anyhow", + "serde", "thiserror", ] @@ -61,7 +66,7 @@ dependencies = [ "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_lr_lr1", "copager_utils", "serde", "serde_cbor", @@ -96,12 +101,13 @@ version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", + "copager_core", "copager_ir", "copager_ir_sexp", "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_lr_lr1", "thiserror", ] @@ -167,6 +173,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_parse_common" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "copager_parse", + "thiserror", +] + [[package]] name = "copager_parse_derive" version = "0.2.0" @@ -182,36 +199,79 @@ dependencies = [ ] [[package]] -name = "copager_parse_lr1" +name = "copager_parse_lr_common" version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", "copager_core", + "copager_parse", + "copager_parse_common", + "serde", + "thiserror", +] + +[[package]] +name = "copager_parse_lr_lr0" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_core", + "copager_ir_void", "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_lr_common", "copager_utils", - "itertools", "serde", "thiserror", ] [[package]] -name = "copager_utils" -version = "0.1.1" +name = "copager_parse_lr_lr1" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_core", + "copager_ir_void", + "copager_lex", + "copager_lex_regex", + "copager_parse", + "copager_parse_common", + "copager_parse_lr_common", + "copager_utils", + "serde", + "thiserror", +] + +[[package]] +name = "copager_parse_lr_slr1" +version = "0.2.0" dependencies = [ "anyhow", + "copager_cfg", + "copager_core", + "copager_ir_void", + "copager_lex", + "copager_lex_regex", + "copager_parse", + "copager_parse_common", + "copager_parse_lr_common", + "copager_utils", "serde", "thiserror", ] [[package]] -name = "either" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +name = "copager_utils" +version = "0.1.1" +dependencies = [ + "anyhow", + "serde", + "thiserror", +] [[package]] name = "example_build_oneshot" @@ -285,15 +345,6 @@ version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "1.0.11" diff --git a/Cargo.toml b/Cargo.toml index 928bba1..16a0d11 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,11 @@ copager_cfg = { path = "./crates/cfg" } copager_lex = { path = "./crates/lex", optional = true } copager_lex_regex = { path = "./crates/lex_regex", optional = true } copager_parse = { path = "./crates/parse", optional = true } -copager_parse_lr1 = { path = "./crates/parse_lr1", optional = true } +copager_parse_common = { path = "./crates/parse_common", optional = true } +copager_parse_lr_common = { path = "./crates/parse_lr_common", optional = true } +copager_parse_lr_lr0 = { path = "./crates/parse_lr_lr0", optional = true } +copager_parse_lr_lr1 = { path = "./crates/parse_lr_lr1", optional = true } +copager_parse_lr_slr1 = { path = "./crates/parse_lr_slr1", optional = true } copager_ir = { path = "./crates/ir" } copager_ir_void = { path = "./crates/ir_void", optional = true } copager_ir_sexp = { path = "./crates/ir_sexp", optional = true } @@ -29,17 +33,27 @@ example_lang_pl0 = { path = "./examples/lang_pl0" } example_lang_xml = { path = "./examples/lang_xml" } [features] +# all +all = [ + "prebuild", "derive", "dev", # common + "regexlex", # lex + "lr0", "lr1", "slr1", # parse + "void", "sexp" # ir +] + # common default = ["dep:copager_lex", "dep:copager_parse"] -all = ["prebuild", "derive", "regexlex", "lr1", "void", "sexp"] prebuild = ["dep:serde_json"] derive = ["copager_lex/derive", "copager_parse/derive"] +dev = ["dep:copager_parse_common", "dep:copager_parse_lr_common"] # lex regexlex = ["dep:copager_lex_regex"] # parse -lr1 = ["dep:copager_parse_lr1"] +lr0 = ["dep:copager_parse_lr_lr0"] +lr1 = ["dep:copager_parse_lr_lr1"] +slr1 = ["dep:copager_parse_lr_slr1"] # ir void = ["dep:copager_ir_void"] @@ -56,8 +70,12 @@ members = [ "./crates/lex_derive", "./crates/lex_regex", "./crates/parse", + "./crates/parse_common", "./crates/parse_derive", - "./crates/parse_lr1", + "./crates/parse_lr_common", + "./crates/parse_lr_lr0", + "./crates/parse_lr_lr1", + "./crates/parse_lr_slr1", "./crates/ir", "./crates/ir_void", "./crates/ir_sexp", diff --git a/README.md b/README.md index 2a6ade1..818d305 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ Rust製パーサジェネレータ - `all` - `derive` - `prebuild` +- `dev` ### Lex @@ -16,6 +17,7 @@ Rust製パーサジェネレータ ### Parse +- `lr0` : [crates/parse_lr0](crates/parse_lr0) - `lr1` : [crates/parse_lr1](crates/parse_lr1) ### IR diff --git a/crates/cfg/Cargo.toml b/crates/cfg/Cargo.toml index 220fec0..0514617 100644 --- a/crates/cfg/Cargo.toml +++ b/crates/cfg/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 2f1bd24..156bef0 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -1,30 +1,89 @@ -use std::collections::HashMap; -use std::fmt::Debug; +use std::collections::HashSet; +use std::fmt::{Display, Debug}; use std::hash::Hash; +use serde::{Serialize, Deserialize}; + use crate::token::TokenTag; pub trait RuleTag where Self: Debug + Copy + Clone + Hash + Eq, { - fn as_rules(&self) -> Vec>; + fn as_rules(&self) -> Vec>; } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Rule { +#[derive(Clone, Eq, Serialize, Deserialize)] +pub struct Rule +where + T: TokenTag, + R: RuleTag, +{ + #[serde(bound( + serialize = "T: Serialize, R: Serialize", + deserialize = "T: Deserialize<'de>, R: Deserialize<'de>", + ))] pub id: usize, + pub tag: Option, pub lhs: RuleElem, pub rhs: Vec>, } -impl From<(RuleElem, Vec>)> for Rule { - fn from((lhs, rhs): (RuleElem, Vec>)) -> Self { - Rule { id: 0, lhs, rhs } +impl Display for Rule +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} ->", self.lhs)?; + for elem in &self.rhs { + write!(f, " {}", elem)?; + } + write!(f, "") + } +} + +impl Debug for Rule +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} ({})", self, self.id) + } +} + +impl PartialEq for Rule +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.tag == other.tag && self.lhs == other.lhs && self.rhs == other.rhs + } +} + +impl Hash for Rule +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.tag.hash(state); + self.lhs.hash(state); + self.rhs.hash(state); } } -impl Rule { +impl Rule +where + T: TokenTag, + R: RuleTag, +{ + pub fn new(tag: Option, lhs: RuleElem, rhs: Vec>) -> Self { + Rule { id: 0, tag, lhs, rhs } + } + pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { let mut l_nonterms = vec![&self.lhs]; let r_nonterms: Vec<&RuleElem> = self @@ -44,28 +103,41 @@ impl Rule { } } -#[derive(Debug, Clone, Eq)] +#[derive(Clone, Hash, Eq, Serialize, Deserialize)] pub enum RuleElem { + #[serde(bound( + serialize = "T: Serialize", + deserialize = "T: Deserialize<'de>", + ))] NonTerm(String), Term(T), + Epsilon, EOF, } -impl Hash for RuleElem { - fn hash(&self, state: &mut H) { +impl Display for RuleElem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - RuleElem::NonTerm(s) => s.hash(state), - RuleElem::Term(t) => t.hash(state), - RuleElem::EOF => 0.hash(state), + RuleElem::NonTerm(s) => write!(f, "<{}>", s), + RuleElem::Term(t) => write!(f, "{:?}", t.as_str()), + RuleElem::Epsilon => write!(f, "ε"), + RuleElem::EOF => write!(f, "$"), } } } +impl Debug for RuleElem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + impl PartialEq for RuleElem { fn eq(&self, other: &Self) -> bool { match (self, other) { (RuleElem::NonTerm(s1), RuleElem::NonTerm(s2)) => s1 == s2, (RuleElem::Term(t1), RuleElem::Term(t2)) => t1 == t2, + (RuleElem::Epsilon, RuleElem::Epsilon) => true, (RuleElem::EOF, RuleElem::EOF) => true, _ => false, } @@ -83,15 +155,23 @@ impl RuleElem { } #[derive(Debug, Clone)] -pub struct RuleSet { +pub struct RuleSet +where + T: TokenTag, + R: RuleTag, +{ pub top: String, - pub rules: Vec>, + pub rules: Vec>, } -impl FromIterator> for RuleSet { +impl FromIterator> for RuleSet +where + T: TokenTag, + R: RuleTag, +{ fn from_iter(rules: I) -> Self where - I: IntoIterator>, + I: IntoIterator>, { let rules = rules.into_iter().collect::>(); let top = match &rules[0].lhs { @@ -102,299 +182,30 @@ impl FromIterator> for RuleSet { } } -impl RuleSet { - pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { +impl RuleSet +where + T: TokenTag, + R: RuleTag, +{ + pub fn update_top(&mut self, rule: Rule) { + if let RuleElem::NonTerm(top) = &rule.lhs { + self.top = top.to_string(); + } + self.rules.push(rule); + } + + pub fn nonterms<'a>(&'a self) -> HashSet<&'a RuleElem> { self.rules.iter().flat_map(|rule| rule.nonterms()).collect() } - pub fn terms<'a>(&'a self) -> Vec<&'a RuleElem> { + pub fn terms<'a>(&'a self) -> HashSet<&'a RuleElem> { self.rules.iter().flat_map(|rule| rule.terms()).collect() } - pub fn find_rule<'a>(&'a self, target: &RuleElem) -> Vec<&'a Rule> { + pub fn find_rule<'a>(&'a self, target: &RuleElem) -> Vec<&'a Rule> { self.rules .iter() .filter(|rule| &rule.lhs == target) .collect() } - - pub fn first_set<'a>(&'a self) -> HashMap<&'a RuleElem, Vec<&'a RuleElem>> { - // 1. Calc a null set - let nulls_set = self.nulls_set(); - - // 2. Initialize a first set - let mut first_set: HashMap<&RuleElem, Vec<&RuleElem>> = HashMap::new(); - first_set.insert(&RuleElem::EOF, vec![&RuleElem::EOF]); - self.terms().into_iter().for_each(|relem| { - first_set.insert(relem, vec![relem]); - }); - self.nonterms().into_iter().for_each(|relem| { - first_set.insert(relem, vec![]); - }); - - // 3. List up candidates from a nonterm set - let mut candidates = vec![]; - for nonterm in self.nonterms() { - let rules = self.find_rule(nonterm); - for rule in rules { - for relem in &rule.rhs { - if &rule.lhs != relem { - candidates.push((nonterm, relem)) - } - if !nulls_set.contains(&relem) { - break; - } - } - } - } - - // 4. Find first set with recursive - let mut updated = true; - while updated { - updated = false; - for (nonterm, candidate) in &candidates { - let found_elems: Vec<&RuleElem> = first_set - .get(candidate) - .unwrap() - .iter() - .filter(|relem| !first_set.get(nonterm).unwrap().contains(relem)) - .copied() - .collect(); - updated = !found_elems.is_empty(); - first_set - .get_mut(nonterm) - .unwrap() - .extend(found_elems.into_iter()); - } - } - - first_set - } - - fn nulls_set<'a>(&'a self) -> Vec<&'a RuleElem> { - // 1. Find null rules - let mut nulls_set: Vec<&RuleElem> = self - .rules - .iter() - .filter(|rule| rule.rhs.is_empty()) - .map(|rule| &rule.lhs) - .collect(); - - // 2. Find null rules with recursive - let mut updated = true; - while updated { - updated = false; - for rule in &self.rules { - if nulls_set.contains(&&rule.lhs) { - continue; - } else if rule.rhs.iter().all(|relem| nulls_set.contains(&relem)) { - nulls_set.push(&rule.lhs); - updated = true; - } else { - continue; - } - } - } - - nulls_set - } } - -// #[cfg(test)] -// mod test { -// use std::collections::HashMap; - -// use crate::token::TokenTag; -// use crate::RuleKind; - -// use super::{Rule, RuleElem}; - -// #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] -// enum TestToken { -// Num, -// Plus, -// Minus, -// Mul, -// Div, -// BracketA, -// BracketB, -// } - -// impl TokenKind<'_> for TestToken { -// fn as_str(&self) -> &'static str { -// match self { -// TestToken::Num => r"^[1-9][0-9]*", -// TestToken::Plus => r"^\+", -// TestToken::Minus => r"^-", -// TestToken::Mul => r"^\*", -// TestToken::Div => r"^/", -// TestToken::BracketA => r"^\(", -// TestToken::BracketB => r"^\)", -// } -// } - -// fn ignore_str() -> &'static str { -// r"^[ \t\n]+" -// } - -// fn into_iter() -> impl Iterator { -// vec![ -// TestToken::Num, -// TestToken::Plus, -// TestToken::Minus, -// TestToken::Mul, -// TestToken::Div, -// TestToken::BracketA, -// TestToken::BracketB, -// ] -// .into_iter() -// } -// } - -// #[derive(Debug, Clone, Hash, PartialEq, Eq)] -// enum TestRule { -// ExprPlus, -// ExprMinus, -// Expr2Term, -// TermMul, -// TermDiv, -// Term2Fact, -// Fact2Expr, -// Fact2Num, -// } - -// impl<'a> RuleKind<'a> for TestRule { -// type TokenKind = TestToken; - -// fn into_iter() -> impl Iterator { -// Box::new( -// vec![ -// TestRule::ExprPlus, -// TestRule::ExprMinus, -// TestRule::Expr2Term, -// TestRule::TermMul, -// TestRule::TermDiv, -// TestRule::Term2Fact, -// TestRule::Fact2Expr, -// TestRule::Fact2Num, -// ] -// .into_iter(), -// ) -// } - -// fn into_rules(&self) -> Vec> { -// let expr_plus = Rule::from(( -// RuleElem::new_nonterm("expr"), -// vec![ -// RuleElem::new_nonterm("expr"), -// RuleElem::new_term(TestToken::Plus), -// RuleElem::new_nonterm("term"), -// ], -// )); - -// let expr_minus = Rule::from(( -// RuleElem::new_nonterm("expr"), -// vec![ -// RuleElem::new_nonterm("expr"), -// RuleElem::new_term(TestToken::Minus), -// RuleElem::new_nonterm("term"), -// ], -// )); - -// let expr_2_term = Rule::::from(( -// RuleElem::new_nonterm("expr"), -// vec![RuleElem::new_nonterm("term")], -// )); - -// let term_mul = Rule::from(( -// RuleElem::new_nonterm("term"), -// vec![ -// RuleElem::new_nonterm("term"), -// RuleElem::new_term(TestToken::Mul), -// RuleElem::new_nonterm("fact"), -// ], -// )); - -// let term_div = Rule::from(( -// RuleElem::new_nonterm("term"), -// vec![ -// RuleElem::new_nonterm("term"), -// RuleElem::new_term(TestToken::Div), -// RuleElem::new_nonterm("fact"), -// ], -// )); - -// let term_2_fact = Rule::::from(( -// RuleElem::new_nonterm("term"), -// vec![RuleElem::new_nonterm("fact")], -// )); - -// let fact_2_expr = Rule::from(( -// RuleElem::new_nonterm("fact"), -// vec![ -// RuleElem::new_term(TestToken::BracketA), -// RuleElem::new_nonterm("expr"), -// RuleElem::new_term(TestToken::BracketB), -// ], -// )); - -// let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![])); - -// match self { -// TestRule::ExprPlus => vec![expr_plus], -// TestRule::ExprMinus => vec![expr_minus], -// TestRule::Expr2Term => vec![expr_2_term], -// TestRule::TermMul => vec![term_mul], -// TestRule::TermDiv => vec![term_div], -// TestRule::Term2Fact => vec![term_2_fact], -// TestRule::Fact2Expr => vec![fact_2_expr], -// TestRule::Fact2Num => vec![fact_2_num], -// } -// } -// } - -// fn check>( -// first_set: &HashMap<&RuleElem, Vec<&RuleElem>>, -// nonterm: T, -// exp_terms: Vec, -// ) { -// let nonterms = RuleElem::::new_nonterm(nonterm); -// let exp_terms: Vec> = exp_terms -// .into_iter() -// .map(|term| RuleElem::new_term(term)) -// .collect(); -// assert!(first_set.get(&nonterms).unwrap().len() == exp_terms.len()); - -// let result = first_set -// .get(&nonterms) -// .unwrap() -// .into_iter() -// .zip(exp_terms.into_iter()) -// .any(|(a, b)| a == &&b); -// assert!(result); -// } - -// #[test] -// fn first_set() { -// let ruleset = ::into_ruleset(); -// let first_set = ruleset.first_set(); - -// check( -// &first_set, -// "expr", -// vec![ -// TestToken::Plus, -// TestToken::Minus, -// TestToken::Mul, -// TestToken::Div, -// TestToken::BracketA, -// ], -// ); -// check( -// &first_set, -// "term", -// vec![TestToken::Mul, TestToken::Div, TestToken::BracketA], -// ); -// check(&first_set, "fact", vec![TestToken::BracketA]); -// } -// } diff --git a/crates/cfg/src/token.rs b/crates/cfg/src/token.rs index 1469f80..8e47435 100644 --- a/crates/cfg/src/token.rs +++ b/crates/cfg/src/token.rs @@ -1,6 +1,8 @@ use std::fmt::Debug; use std::hash::Hash; +use serde::{Serialize, Deserialize}; + pub trait TokenTag where Self: Debug + Copy + Clone + Hash + Eq, @@ -8,7 +10,7 @@ where fn as_str<'a, 'b>(&'a self) -> &'b str; } -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] pub struct Token<'input, T: TokenTag> { pub kind: T, pub src: &'input str, diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 6d8bf34..6f5d292 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -19,5 +19,5 @@ copager_core = { path = "." } copager_lex = { path = "../lex", features = ["derive"]} copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } -copager_parse_lr1 = { path = "../parse_lr1" } +copager_parse_lr_lr1 = { path = "../parse_lr_lr1" } copager_ir_void = { path = "../ir_void" } diff --git a/crates/core/tests/prebuild.rs b/crates/core/tests/prebuild.rs index af7db01..2812df9 100644 --- a/crates/core/tests/prebuild.rs +++ b/crates/core/tests/prebuild.rs @@ -8,7 +8,7 @@ use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; use copager_ir_void::Void; #[derive( diff --git a/crates/core/tests/simple.rs b/crates/core/tests/simple.rs index 4a62fd6..c65b85a 100644 --- a/crates/core/tests/simple.rs +++ b/crates/core/tests/simple.rs @@ -6,7 +6,7 @@ use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; use copager_ir_void::Void; #[derive( diff --git a/crates/core/tests/simple_multiple.rs b/crates/core/tests/simple_multiple.rs index 5e8ebc0..8697c12 100644 --- a/crates/core/tests/simple_multiple.rs +++ b/crates/core/tests/simple_multiple.rs @@ -6,7 +6,7 @@ use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; use copager_ir_void::Void; #[derive( diff --git a/crates/ir_sexp/Cargo.toml b/crates/ir_sexp/Cargo.toml index b3e15ca..a3c4ec8 100644 --- a/crates/ir_sexp/Cargo.toml +++ b/crates/ir_sexp/Cargo.toml @@ -12,8 +12,9 @@ copager_parse = { path = "../parse" } copager_ir = { path = "../ir" } [dev-dependencies] +copager_core = { path = "../core" } copager_lex = { path = "../lex", features = ["derive"] } copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } -copager_parse_lr1 = { path = "../parse_lr1" } +copager_parse_lr_lr1 = { path = "../parse_lr_lr1" } copager_ir_sexp = { path = "." } diff --git a/crates/ir_sexp/tests/simple.rs b/crates/ir_sexp/tests/simple.rs index 0f42f78..31e983b 100644 --- a/crates/ir_sexp/tests/simple.rs +++ b/crates/ir_sexp/tests/simple.rs @@ -1,10 +1,10 @@ +use copager_core::{Grammar, Processor}; use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleTag, Rule, RuleElem}; -use copager_lex::{LexSource, LexDriver}; +use copager_lex::LexSource; use copager_lex_regex::RegexLexer; -use copager_parse::{ParseSource, ParseDriver, ParseEvent}; -use copager_parse_lr1::LR1; -use copager_ir::{IR, IRBuilder}; +use copager_parse::ParseSource; +use copager_parse_lr_lr1::LR1; use copager_ir_sexp::SExp; #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] @@ -44,10 +44,6 @@ enum ExprRule { Num, } -type MyLexer = RegexLexer; -type MyParser = LR1; -type MyIR = SExp<'static, ExprToken, ExprRule>; - #[test] fn simple_display() { let ir = parse("1"); @@ -68,28 +64,15 @@ fn simple_eval() { } fn parse<'input>(input: &'input str) -> anyhow::Result> { - let source = ExprToken::default(); - let lexer = >::try_from(source).unwrap(); - - let source = (ExprToken::default(), ExprRule::default()); - let parser = >::try_from(source).unwrap(); - - let mut ir_builder = >::Builder::new(); - for event in parser.run(lexer.run(input)) { - match event { - ParseEvent::Read(token) => { - ir_builder.on_read(token).unwrap(); - } - ParseEvent::Parse { rule, len } => { - ir_builder.on_parse(rule, len).unwrap(); - } - ParseEvent::Err(err) => { - return Err(anyhow::anyhow!("{:?}", err)); - } - } - } + type TestLang = Grammar; + type TestLexer = RegexLexer; + type TestParser = LR1; + type TestProcessor = Processor; - ir_builder.build() + TestProcessor::new() + .build_lexer()? + .build_parser()? + .process::>(input) } fn eval(ir: &SExp<'static, ExprToken, ExprRule>) -> i32 { diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 760f467..f259836 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -9,7 +9,7 @@ pub trait ParseSource { fn iter(&self) -> impl Iterator; - fn into_ruleset(&self) -> RuleSet { + fn into_ruleset(&self) -> RuleSet { let set_id_for_all = |(id, tag): (usize, Self::Tag)| { tag.as_rules() .into_iter() @@ -22,7 +22,7 @@ pub trait ParseSource { self.iter() .enumerate() .flat_map(set_id_for_all) - .collect::>() + .collect::>() } } diff --git a/crates/parse_common/Cargo.toml b/crates/parse_common/Cargo.toml new file mode 100644 index 0000000..0351c48 --- /dev/null +++ b/crates/parse_common/Cargo.toml @@ -0,0 +1,15 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_common" +version = "0.2.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } + +[dev-dependencies] +copager_lex = { path = "../lex", features = ["derive"] } +copager_parse = { path = "../parse", features = ["derive"] } diff --git a/crates/parse_common/src/lib.rs b/crates/parse_common/src/lib.rs new file mode 100644 index 0000000..90d8760 --- /dev/null +++ b/crates/parse_common/src/lib.rs @@ -0,0 +1 @@ +pub mod rule; diff --git a/crates/parse_common/src/rule.rs b/crates/parse_common/src/rule.rs new file mode 100644 index 0000000..6eb0269 --- /dev/null +++ b/crates/parse_common/src/rule.rs @@ -0,0 +1,7 @@ +mod first; +mod follow; +mod director; + +pub use first::FirstSet; +pub use follow::FollowSet; +pub use director::DirectorSet; diff --git a/crates/parse_common/src/rule/director.rs b/crates/parse_common/src/rule/director.rs new file mode 100644 index 0000000..a80f7d0 --- /dev/null +++ b/crates/parse_common/src/rule/director.rs @@ -0,0 +1,178 @@ +use std::collections::{HashMap, HashSet}; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; + +use crate::rule::{FirstSet, FollowSet}; + +pub struct DirectorSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap<&'a Rule, Vec<&'a RuleElem>>, +} + +impl<'a, T, R> From<&'a RuleSet> for DirectorSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let build = DirectorSetBuilder::from(ruleset).calc(); + let map = build.map + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect())) + .collect(); + + DirectorSet { map } + } +} + +impl <'a, T, R> DirectorSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn get(&self, rule: &Rule) -> Option<&[&'a RuleElem]> { + self.map.get(rule).map(|elems| elems.as_slice()) + } +} + +struct DirectorSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap<&'a Rule, HashSet<&'a RuleElem>>, + ruleset: &'a RuleSet, + first_set: FirstSet<'a, T, R>, + follow_set: FollowSet<'a, T, R>, +} + +impl<'a, T, R> From<&'a RuleSet> for DirectorSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let first_set = FirstSet::from(ruleset); + let follow_set = FollowSet::from(ruleset); + + DirectorSetBuilder { + map: HashMap::new(), + ruleset, + first_set, + follow_set, + } + } +} + +impl<'a, T, R> DirectorSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn calc(mut self) -> Self { + for rule in &self.ruleset.rules { + self.calc_once(rule); + } + self + } + + fn calc_once(&mut self, rule: &'a Rule) { + let lhs = match &rule.lhs { + RuleElem::NonTerm(s) => s.as_str(), + _ => unreachable!(), + }; + + let rhs_firsts = self.first_set.get_by(&rule.rhs).to_vec(); + let cand_elems = if !rhs_firsts.contains(&&RuleElem::Epsilon) { + rhs_firsts + } else { + let mut cand_elems = rhs_firsts.to_vec(); + cand_elems.extend_from_slice(self.follow_set.get(&lhs).unwrap()); + cand_elems + }; + + let director_elems = cand_elems + .into_iter() + .filter(|&e| *e != RuleElem::Epsilon) + .collect(); + self.map.insert(rule, director_elems); + } +} + +#[cfg(test)] +mod test { + use copager_cfg::token::TokenTag; + use copager_cfg::rule::{Rule, RuleTag, RuleElem}; + use copager_lex::LexSource; + use copager_parse::ParseSource; + + use super::DirectorSet; + + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] + enum TestToken { + #[token(r"a")] + A, + #[token(r"b")] + B, + } + + #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] + enum TestRule { + #[default] + #[rule(" ::= ")] + S, + #[rule(" ::= A")] + A, + #[rule(" ::= B")] + B, + #[rule(" ::= ")] + C, + } + + fn eq_symbols(lhs: &[&RuleElem], rhs: &[RuleElem]) -> bool + where + T: TokenTag, + { + if lhs.len() != rhs.len() { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + for lelem in lhs { + if !rhs.contains(lelem) { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + } + return true; + } + + #[test] + fn follow_set() { + macro_rules! term { + ($expr:ident) => { RuleElem::new_term(TestToken::$expr) }; + } + + let ruleset = TestRule::default().into_ruleset(); + let director_set = DirectorSet::from(&ruleset); + + let rule = &TestRule::S.as_rules()[0]; + let expected = vec![term!(A)]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + + let rule = &TestRule::A.as_rules()[0]; + let expected = vec![term!(A)]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + + let rule = &TestRule::B.as_rules()[0]; + let expected = vec![term!(A)]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + + let rule = &TestRule::C.as_rules()[0]; + let expected = vec![]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + } +} diff --git a/crates/parse_common/src/rule/first.rs b/crates/parse_common/src/rule/first.rs new file mode 100644 index 0000000..dd3dbd6 --- /dev/null +++ b/crates/parse_common/src/rule/first.rs @@ -0,0 +1,210 @@ +use std::collections::{HashMap, HashSet}; +use std::marker::PhantomData; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleElem, RuleSet, RuleTag}; + +pub struct FirstSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap<&'a RuleElem, Vec<&'a RuleElem>>, + _phantom: PhantomData, +} + +impl<'a, T, R> From<&'a RuleSet> for FirstSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let build = FirstSetBuilder::from(ruleset).expand(); + let map = build.map + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect())) + .collect(); + + FirstSet { + map, + _phantom: PhantomData, + } + } +} + +impl<'a, T, R> FirstSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn get(&self, relem: &RuleElem) -> Option<&[&'a RuleElem]> { + self.map.get(relem).map(|terms| terms.as_slice()) + } + + pub fn get_by(&self, relems: &[RuleElem]) -> Vec<&'a RuleElem> { + if relems.is_empty() { + vec![&RuleElem::EOF] + } else { + let mut firsts: HashSet<&'a RuleElem> = HashSet::new(); + for relem in relems { + let first_candidates = self.map.get(relem).unwrap(); + firsts.extend(first_candidates); + if firsts.contains(&RuleElem::Epsilon) { + firsts.remove(&RuleElem::Epsilon); + continue + } + break + } + firsts.into_iter().collect() + } + } +} + +struct FirstSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap<&'a RuleElem, HashSet<&'a RuleElem>>, + ruleset: &'a RuleSet, + nonterms: Vec<&'a RuleElem>, +} + +impl<'a, T, R> From<&'a RuleSet> for FirstSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let mut map = HashMap::new(); + ruleset.nonterms().iter().for_each(|&nonterm| { + map.insert(nonterm, HashSet::new()); + }); + ruleset.terms().iter().for_each(|&term| { + map.insert(term, HashSet::new()); + map.get_mut(term).unwrap().insert(term); + }); + map.insert(&RuleElem::EOF, HashSet::new()); + map.get_mut(&RuleElem::EOF).unwrap().insert(&RuleElem::EOF); + + let nonterms = ruleset.nonterms().into_iter().collect(); + + FirstSetBuilder { + map, + ruleset, + nonterms, + } + } +} + +impl<'a, T, R> FirstSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn expand(mut self) -> Self { + while self.expand_child() {} + self + } + + fn expand_child(&mut self) -> bool { + let mut modified = false; + for &nonterm in &self.nonterms { + let old_len = self.map.get(nonterm).unwrap().len(); + for first_symbol in rhs_first_symbol(self.ruleset, nonterm) { + if matches!(first_symbol, RuleElem::NonTerm(_)) { + let cand_terms = self.map.get(first_symbol).unwrap().clone(); + self.map.get_mut(nonterm).unwrap().extend(cand_terms); + } else { + self.map.get_mut(nonterm).unwrap().insert(first_symbol); + } + } + modified |= old_len != self.map.get(nonterm).unwrap().len(); + } + modified + } +} + +fn rhs_first_symbol<'a, T, R>(ruleset: &'a RuleSet, nonterm: &RuleElem) -> impl Iterator> +where + T: TokenTag, + R: RuleTag, +{ + ruleset.rules + .iter() + .filter(move |&rule| &rule.lhs == nonterm) + .flat_map(|rule| rule.rhs.first()) +} + +#[cfg(test)] +mod test { + use copager_cfg::token::TokenTag; + use copager_cfg::rule::{Rule, RuleTag, RuleElem}; + use copager_lex::LexSource; + use copager_parse::ParseSource; + + use super::FirstSet; + + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] + enum TestToken { + #[token(r"a")] + A, + #[token(r"b")] + B, + } + + #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] + enum TestRule { + #[default] + #[rule(" ::= ")] + S, + #[rule(" ::= A")] + A, + #[rule(" ::= B")] + B, + #[rule(" ::= ")] + C, + } + + fn eq_symbols(lhs: &[&RuleElem], rhs: &[RuleElem]) -> bool + where + T: TokenTag, + { + if lhs.len() != rhs.len() { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + for lelem in lhs { + if !rhs.contains(lelem) { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + } + return true; + } + + #[test] + fn first_set() { + macro_rules! term { + ($ident:ident) => { RuleElem::new_term(TestToken::$ident) }; + } + macro_rules! nonterm { + ($expr:expr) => { RuleElem::new_nonterm($expr) }; + } + + let ruleset = TestRule::default().into_ruleset(); + let first_set = FirstSet::from(&ruleset); + + let expected = vec![term!(A)]; + assert!(eq_symbols(first_set.get(&nonterm!("S")).unwrap(), expected.as_slice())); + + let expected = vec![term!(A)]; + assert!(eq_symbols(first_set.get(&nonterm!("A")).unwrap(), expected.as_slice())); + + let expected = vec![term!(A)]; + assert!(eq_symbols(first_set.get(&nonterm!("B")).unwrap(), expected.as_slice())); + + let expected = vec![RuleElem::Epsilon]; + assert!(eq_symbols(first_set.get(&nonterm!("C")).unwrap(), expected.as_slice())); + } +} diff --git a/crates/parse_common/src/rule/follow.rs b/crates/parse_common/src/rule/follow.rs new file mode 100644 index 0000000..f637c31 --- /dev/null +++ b/crates/parse_common/src/rule/follow.rs @@ -0,0 +1,199 @@ +use std::collections::{HashMap, HashSet}; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleElem, RuleSet, RuleTag}; + +use crate::rule::FirstSet; + +pub struct FollowSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap>>, + _ruleset: &'a RuleSet, +} + +impl<'a, T, R> From<&'a RuleSet> for FollowSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let build = FollowSetBuilder::from(ruleset).expand(); + let map = build.map + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect())) + .collect(); + + FollowSet { + map, + _ruleset: ruleset, + } + } +} + +impl<'a, T, R> FollowSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn get(&self, nonterm: &str) -> Option<&[&'a RuleElem]> { + self.map.get(nonterm).map(|terms| terms.as_slice()) + } +} + +pub struct FollowSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap>>, + ruleset: &'a RuleSet, +} + +impl<'a, T, R> From<&'a RuleSet> for FollowSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let mut map = HashMap::new(); + for nonterm in ruleset.nonterms() { + if let RuleElem::NonTerm(nonterm) = nonterm { + map.insert(nonterm.clone(), HashSet::new()); + } + } + map.get_mut(&ruleset.top).unwrap().insert(&RuleElem::EOF); + + FollowSetBuilder { + map, + ruleset, + } + } +} + +impl<'a, T, R> FollowSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn expand(mut self) -> Self { + while self.expand_child() {} + self + } + + fn expand_child(&mut self) -> bool { + let first_set = FirstSet::from(self.ruleset); + + let mut modified = false; + for rule in &self.ruleset.rules { + let lhs = match &rule.lhs { + RuleElem::NonTerm(s) => s.as_str(), + _ => unreachable!(), + }; + for rhs_idx in 0..rule.rhs.len() { + let target = &rule.rhs[rhs_idx]; + let follow_symbols = &rule.rhs[rhs_idx+1..]; + let prob_first_symbols = first_set.get_by(follow_symbols); + modified |= self.append_by_first(target, &prob_first_symbols); + if prob_first_symbols.contains(&&RuleElem::Epsilon) { + modified |= self.append_when_nullable(target, lhs); + } + } + } + modified + } + + fn append_by_first(&mut self, target: &RuleElem, first_symbol: &[&'a RuleElem]) -> bool { + if let RuleElem::NonTerm(nonterm) = target { + let old_idx = self.map.get(nonterm).unwrap().len(); + let first_symbol = first_symbol.iter().filter(|relem| **relem != &RuleElem::Epsilon); + self.map.get_mut(nonterm).unwrap().extend(first_symbol); + old_idx != self.map.get(nonterm).unwrap().len() + } else { + false + } + } + + fn append_when_nullable(&mut self, target: &RuleElem, lhs: &str) -> bool { + if let RuleElem::NonTerm(nonterm) = target { + let lhs_follow = self.map.get(lhs).unwrap().clone(); + let old_idx = self.map.get(nonterm).unwrap().len(); + self.map.get_mut(nonterm).unwrap().extend(lhs_follow); + old_idx != self.map.get(nonterm).unwrap().len() + } else { + false + } + } +} + +#[cfg(test)] +mod test { + use copager_cfg::token::TokenTag; + use copager_cfg::rule::{Rule, RuleTag, RuleElem}; + use copager_lex::LexSource; + use copager_parse::ParseSource; + + use super::FollowSet; + + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] + enum TestToken { + #[token(r"a")] + A, + #[token(r"b")] + B, + } + + #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] + enum TestRule { + #[default] + #[rule(" ::= ")] + S, + #[rule(" ::= A")] + A, + #[rule(" ::= B")] + B, + #[rule(" ::= ")] + C, + } + + fn eq_symbols(lhs: &[&RuleElem], rhs: &[RuleElem]) -> bool + where + T: TokenTag, + { + if lhs.len() != rhs.len() { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + for lelem in lhs { + if !rhs.contains(lelem) { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + } + return true; + } + + #[test] + fn follow_set() { + macro_rules! term { + ($expr:ident) => { RuleElem::new_term(TestToken::$expr) }; + } + + let ruleset = TestRule::default().into_ruleset(); + let follow_set = FollowSet::from(&ruleset); + + let expected = vec![term!(B), RuleElem::EOF]; + assert!(eq_symbols(follow_set.get("S").unwrap(), expected.as_slice())); + + let expected = vec![term!(A)]; + assert!(eq_symbols(follow_set.get("A").unwrap(), expected.as_slice())); + + let expected = vec![term!(B), RuleElem::EOF]; + assert!(eq_symbols(follow_set.get("B").unwrap(), expected.as_slice())); + + let expected = vec![]; + assert!(eq_symbols(follow_set.get("C").unwrap(), expected.as_slice())); + } +} diff --git a/crates/parse_derive/src/impl/rule.rs b/crates/parse_derive/src/impl/rule.rs index 5031024..6855b79 100644 --- a/crates/parse_derive/src/impl/rule.rs +++ b/crates/parse_derive/src/impl/rule.rs @@ -29,7 +29,7 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { quote! { impl RuleTag<#enum_assoc_type> for #enum_name { - fn as_rules(&self) -> Vec> { + fn as_rules(&self) -> Vec> { match self { #( #enum_matcher_table_i2r, )* } @@ -49,7 +49,7 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { struct VariantInfo<'a> { parent_ident: &'a Ident, self_ident: &'a Ident, - rules: Vec, + rule_lhs_rhs_tuples: Vec, } impl<'a> VariantInfo<'a> { @@ -60,18 +60,18 @@ impl<'a> VariantInfo<'a> { .parse::() .unwrap(); - let mut rules = vec![]; + let mut rule_lhs_rhs_tuples = vec![]; for attr in &variant.attrs { if attr.path().is_ident("rule") { let attr = attr.parse_args::().unwrap().value(); - rules.push(parse_rule(&token_ident, &attr)); + rule_lhs_rhs_tuples.push(parse_rule(&token_ident, &attr)); } } VariantInfo { parent_ident, self_ident, - rules, + rule_lhs_rhs_tuples, } } @@ -84,11 +84,11 @@ impl<'a> VariantInfo<'a> { fn gen_matcher_ident_to_rule(&self) -> TokenStream { let ident = self.gen_ident(); - if self.rules.is_empty() { + if self.rule_lhs_rhs_tuples.is_empty() { quote! { #ident => unimplemented!() } } else { - let rules = &self.rules; - quote! { #ident => vec![#(#rules),*] } + let lhs_rhs_tuple = &self.rule_lhs_rhs_tuples; + quote! { #ident => vec![#(Rule::new(Some(#ident), #lhs_rhs_tuple)),*] } } } } @@ -112,6 +112,11 @@ fn parse_rule(token: &TokenStream, input: &str) -> TokenStream { } }) .collect::>(); + let rhs = if rhs.len() == 0 { + vec![quote! { RuleElem::Epsilon }] + } else { + rhs + }; - quote! { Rule::from((#lhs, vec![ #( #rhs, )* ])) } + quote! { #lhs, vec![ #( #rhs, )* ], } } diff --git a/crates/parse_lr1/src/builder.rs b/crates/parse_lr1/src/builder.rs deleted file mode 100644 index 5978ed8..0000000 --- a/crates/parse_lr1/src/builder.rs +++ /dev/null @@ -1,394 +0,0 @@ -use std::collections::{HashMap, HashSet}; -use std::hash::Hash; - -use itertools::Itertools; -use serde::{Serialize, Deserialize}; - -use copager_cfg::token::TokenTag; -use copager_cfg::rule::{Rule, RuleElem, RuleSet}; -use copager_lex::LexSource; -use copager_parse::ParseSource; - -#[derive(Debug, Serialize, Deserialize)] -pub enum LRAction { - Shift(usize), - Reduce(R, usize, usize), // tag, goto_id, elems_cnt - Accept, - None, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct LR1Configure -where - Sl: LexSource, - Sp: ParseSource, -{ - #[serde(bound( - serialize = "Sl::Tag: Serialize, Sp::Tag: Serialize", - deserialize = "Sl::Tag: Deserialize<'de>, Sp::Tag: Deserialize<'de>", - ))] - pub action_table: Vec>>, - pub eof_action_table: Vec>, - pub goto_table: Vec>, -} - -impl LR1Configure -where - Sl: LexSource, - Sp: ParseSource, -{ - pub fn new(source_l: &Sl, source_p: &Sp) -> anyhow::Result { - // 1. Pre-process - let ruleset = source_p.into_ruleset(); - let first_set = ruleset.first_set(); - - // 2. Generate dummy nonterm - let top_dummy: Rule = Rule::from(( - RuleElem::new_nonterm("__top_dummy"), - vec![RuleElem::new_nonterm(&ruleset.top)], - )); - let top_dummy = vec![LRItem::new( - &top_dummy, - HashSet::from_iter(vec![&RuleElem::EOF]), - )]; - let lr_items = LRItemSet::new(0, HashSet::from_iter(top_dummy)); - let lr_items = lr_items.expand_closure(&ruleset, &first_set); - - // 3. Generate a DFA - let dfa = LRItemDFA::r#gen(lr_items, &ruleset, &first_set); - - // 4. Initialize tables - let mut idx = 0; - let mut nonterm_table = HashMap::new(); - for relem in ruleset.nonterms() { - if let RuleElem::NonTerm(s) = &relem { - if !nonterm_table.contains_key(s) { - nonterm_table.insert(s.to_string(), idx); - idx += 1; - } - } - } - - let mut action_table: Vec>> = Vec::with_capacity(dfa.0.len()); - let mut eof_action_table: Vec> = Vec::with_capacity(dfa.0.len()); - let mut goto_table: Vec> = Vec::with_capacity(dfa.0.len()); - for _ in 0..dfa.0.len() { - action_table.push(HashMap::from_iter( - source_l.iter() - .map(|token| (token, LRAction::None)) - .collect::)>>(), - )); - eof_action_table.push(LRAction::None); - goto_table.push(vec![0; nonterm_table.keys().len()]); - } - - // 5. Setup tables - let rule_tags = source_p.iter().collect::>(); - for lritem_set in &dfa.0 { - for (token, next) in &lritem_set.next { - match &token { - RuleElem::NonTerm(s) => { - let id = lritem_set.id as usize; - let label = *nonterm_table.get(s).unwrap(); - goto_table[id][label] = *next as usize; - } - RuleElem::Term(t) => { - let id = lritem_set.id as usize; - let label = action_table[id].get_mut(t).unwrap(); - *label = LRAction::Shift(*next as usize); - } - _ => {} - } - } - - for item in &lritem_set.lr_items { - if item.dot_pos != item.rule.rhs.len() { - continue; - } - if let RuleElem::NonTerm(lhs) = &item.rule.lhs { - for la_token in &item.la_tokens { - if let RuleElem::Term(t) = la_token { - let id = lritem_set.id as usize; - let label = action_table[id].get_mut(t).unwrap(); - *label = LRAction::Reduce( - rule_tags[item.rule.id as usize], - *nonterm_table.get(lhs).unwrap(), - item.rule.rhs.len(), - ); - } - if let RuleElem::EOF = la_token { - let id = lritem_set.id as usize; - eof_action_table[id] = if lhs == "__top_dummy" { - LRAction::Accept - } else { - LRAction::Reduce( - rule_tags[item.rule.id as usize], - *nonterm_table.get(lhs).unwrap(), - item.rule.rhs.len(), - ) - }; - } - } - } - } - } - - Ok(LR1Configure { - action_table, - eof_action_table, - goto_table, - }) - } -} - -#[derive(Debug)] -struct LRItemDFA<'a, T: TokenTag> ( - Vec> -); - -impl<'a, T: TokenTag> LRItemDFA<'a, T> { - fn r#gen( - init_set: LRItemSet<'a, T>, - ruleset: &'a RuleSet, - first_set: &HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> LRItemDFA<'a, T> { - let issue_id = |old_sets: &Vec>, set: &LRItemSet<'a, T>| { - if let Some(ex_set) = old_sets.iter().find(|&set0| set0.strict_eq(set)) { - Err(ex_set.id) - } else { - Ok(old_sets.len() as i32) - } - }; - - // "Expand a closure" <--> "Generate next nodes" loop - let mut loop_idx = (0, 1); - let mut lritem_sets = vec![init_set]; - while loop_idx.0 != loop_idx.1 { - let mut new_found_cnt = 0; - for idx in loop_idx.0..loop_idx.1 { - let next_sets = lritem_sets[idx].gen_next_sets(ruleset, first_set); - for (bef_token, mut next_set) in next_sets { - match issue_id(&lritem_sets, &next_set) { - Ok(id) => { - next_set.id = id; - lritem_sets[idx].next.insert(bef_token, id); - lritem_sets.push(next_set); - new_found_cnt += 1; - } - Err(id) => { - lritem_sets[idx].next.insert(bef_token, id); - } - } - } - } - loop_idx = (loop_idx.1, loop_idx.1 + new_found_cnt); - } - - LRItemDFA(lritem_sets) - } -} - -#[derive(Clone, Debug, Eq)] -struct LRItemSet<'a, T: TokenTag> { - id: i32, - next: HashMap<&'a RuleElem, i32>, - lr_items: HashSet>, -} - -impl<'a, T: TokenTag> PartialEq for LRItemSet<'a, T> { - fn eq(&self, other: &LRItemSet<'a, T>) -> bool { - self.lr_items == other.lr_items - } -} - -impl<'a, T: TokenTag> PartialEq>> for LRItemSet<'a, T> { - fn eq(&self, other: &HashSet>) -> bool { - &self.lr_items == other - } -} - -impl<'a, T: TokenTag> LRItemSet<'a, T> { - fn new(id: i32, lr_items: HashSet>) -> Self { - LRItemSet { - id, - next: HashMap::new(), - lr_items, - } - } - - fn strict_eq(&self, other: &Self) -> bool { - if self.lr_items.len() != other.lr_items.len() { - return false; - } - self.lr_items - .iter() - .all(|item| other.lr_items.iter().any(|item_b| item_b.strict_eq(item))) - } - - fn expand_closure<'b>( - mut self, - ruleset: &'a RuleSet, - first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> LRItemSet<'a, T> { - let mut lr_items = self.lr_items.clone(); - let mut lr_items_fetched = self.lr_items; - loop { - let new_items: Vec> = lr_items_fetched - .iter() - .flat_map(|item| item.expand_closure(ruleset, first_set)) - .collect(); - let new_items = LRItem::<'_, _>::unify_all(new_items); - let new_items = HashSet::from_iter(new_items); - - let bef_len = lr_items.len(); - lr_items = LRItem::<'_, _>::unity_set(lr_items, new_items.clone()); - let af_len = lr_items.len(); - if bef_len == af_len { - break; - } - lr_items_fetched = new_items; - } - self.lr_items = lr_items; - - self - } - - fn gen_next_sets<'b>( - &self, - ruleset: &'a RuleSet, - first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> HashMap<&'a RuleElem, LRItemSet<'a, T>> { - let new_items: Vec<(&'a RuleElem, LRItem<'a, T>)> = self - .lr_items - .iter() - .filter_map(|lr_item| lr_item.next_dot()) - .collect(); - - let mut new_sets: HashMap<&RuleElem, HashSet>> = HashMap::new(); - for (bef_token, lr_item) in new_items { - if new_sets.get(&bef_token).is_none() { - new_sets.insert(bef_token, HashSet::new()); - } - new_sets.get_mut(&bef_token).unwrap().insert(lr_item); - } - - let mut new_sets_expanded: HashMap<&'a RuleElem, LRItemSet<'_, _>> = HashMap::new(); - for (ktoken, new_set) in new_sets { - let new_set = LRItemSet::new(0, new_set); - let new_set = new_set.expand_closure(ruleset, first_set); - new_sets_expanded.insert(ktoken, new_set); - } - - new_sets_expanded - } -} - -#[derive(Clone, Debug, Eq)] -struct LRItem<'a, T: TokenTag> { - rule: &'a Rule, - dot_pos: usize, - la_tokens: HashSet<&'a RuleElem>, -} - -impl<'a, T: TokenTag> Hash for LRItem<'a, T> { - fn hash(&self, state: &mut H) { - self.rule.hash(state); - self.dot_pos.hash(state); - } -} - -impl<'a, T: TokenTag> PartialEq for LRItem<'a, T> { - fn eq(&self, other: &Self) -> bool { - self.rule == other.rule && self.dot_pos == other.dot_pos - } -} - -impl<'a, T: TokenTag> LRItem<'a, T> { - fn new(rule: &'a Rule, la_tokens: HashSet<&'a RuleElem>) -> LRItem<'a, T> { - LRItem { - rule, - dot_pos: 0, - la_tokens, - } - } - - fn strict_eq(&self, other: &Self) -> bool { - self.rule == other.rule - && self.dot_pos == other.dot_pos - && self.la_tokens == other.la_tokens - } - - fn expand_closure<'b>( - &self, - ruleset: &'a RuleSet, - first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> HashSet> { - let af_la_tokens = if self.dot_pos + 1 < self.rule.rhs.len() { - HashSet::from_iter( - first_set - .get(&self.rule.rhs[self.dot_pos + 1]) - .unwrap() - .clone(), - ) - } else { - self.la_tokens.clone() - }; - - if self.dot_pos < self.rule.rhs.len() - && matches!(self.rule.rhs[self.dot_pos], RuleElem::NonTerm(_)) - { - ruleset - .find_rule(&self.rule.rhs[self.dot_pos]) - .into_iter() - .map(|rule| LRItem::<'_, _>::new(rule, af_la_tokens.clone())) - .collect() - } else { - HashSet::new() - } - } - - #[allow(clippy::int_plus_one)] - fn next_dot(&self) -> Option<(&'a RuleElem, LRItem<'a, T>)> { - if self.dot_pos + 1 <= self.rule.rhs.len() { - let bef_token = &self.rule.rhs[self.dot_pos]; - let item = LRItem { - rule: self.rule, - dot_pos: self.dot_pos + 1, - la_tokens: self.la_tokens.clone(), - }; - Some((bef_token, item)) - } else { - None - } - } - - fn unify(&mut self, other: LRItem<'a, T>) { - if self != &other { - return; - } - other.la_tokens.into_iter().for_each(|la_token| { - if !self.la_tokens.contains(&la_token) { - self.la_tokens.insert(la_token); - } - }); - } - - fn unify_all(mut items: Vec>) -> Vec> { - for idx in (0..items.len()).permutations(2) { - let (a_idx, b_idx) = (idx[0], idx[1]); - let tmp = items[b_idx].clone(); - items[a_idx].unify(tmp); - } - items - } - - fn unity_set( - items_a: HashSet>, - items_b: HashSet>, - ) -> HashSet> { - let mut items_a = Vec::from_iter(items_a); - let items_b = Vec::from_iter(items_b); - items_a.extend(items_b); - HashSet::from_iter(Self::unify_all(items_a)) - } -} diff --git a/crates/parse_lr1/src/error.rs b/crates/parse_lr1/src/error.rs deleted file mode 100644 index 4cbb467..0000000 --- a/crates/parse_lr1/src/error.rs +++ /dev/null @@ -1,23 +0,0 @@ -use thiserror::Error; - -use copager_core::error::ParseError as SuperParseError; -use copager_cfg::token::{TokenTag, Token}; - -#[derive(Debug, Error)] -pub enum ParseError { - #[error("Unexpected token {actual:?} found")] - UnexpectedToken { - actual: String, - }, - #[error("Unexpected EOF")] - UnexpectedEOF, -} - -impl ParseError { - pub fn new_unexpected_token(expected: Token) -> SuperParseError { - let err = ParseError::UnexpectedToken { - actual: format!("{:?}", expected.kind), - }; - SuperParseError::from(err).with(expected) - } -} diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs deleted file mode 100644 index 0e1a754..0000000 --- a/crates/parse_lr1/src/lib.rs +++ /dev/null @@ -1,98 +0,0 @@ -#![feature(gen_blocks)] - -mod error; -mod builder; - -use std::collections::HashMap; - -use serde::{Serialize, Deserialize}; - -use copager_cfg::token::Token; -use copager_lex::LexSource; -use copager_parse::{ParseSource, ParseDriver, ParseEvent}; -use copager_utils::cache::Cacheable; - -use builder::{LR1Configure, LRAction}; -use error::ParseError; - -#[derive(Debug)] -pub struct LR1 -where - Sl: LexSource, - Sp: ParseSource, -{ - tables: LR1Configure, -} - -impl Cacheable<(Sl, Sp)> for LR1 -where - Sl: LexSource, - Sl::Tag: Serialize + for<'de> Deserialize<'de>, - Sp: ParseSource, - Sp::Tag: Serialize + for<'de> Deserialize<'de>, -{ - type Cache = LR1Configure; - - fn new((source_l, source_p): (Sl, Sp)) -> anyhow::Result { - Ok(LR1Configure::new(&source_l, &source_p)?) - } - - fn restore(tables: Self::Cache) -> Self { - LR1 { tables } - } -} - -impl ParseDriver for LR1 -where - Sl: LexSource, - Sp: ParseSource, -{ - fn try_from((source_l, source_p): (Sl, Sp)) -> anyhow::Result { - let tables = LR1Configure::new(&source_l, &source_p)?; - Ok(LR1 { tables }) - } - - gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> - where - Il: Iterator>, - { - let mut stack = vec![0]; - loop { - let token = lexer.next(); - loop { - let top = stack[stack.len() - 1]; - let action = match token { - Some(token) => { - let local_action_table: &HashMap<_, _> = &self.tables.action_table[top]; - (local_action_table.get(&token.kind).unwrap(), Some(token)) - }, - None => (&self.tables.eof_action_table[top], None), - }; - match action { - (LRAction::Shift(new_state), Some(token)) => { - stack.push(*new_state); - yield ParseEvent::Read(token); - break; - } - (LRAction::Reduce(tag, goto, elems_cnt), _) => { - stack.truncate(stack.len() - elems_cnt); - stack.push(self.tables.goto_table[stack[stack.len() - 1]][*goto]); - yield ParseEvent::Parse { rule: *tag, len: *elems_cnt }; - } - (LRAction::Accept, _) => { - return; - } - (LRAction::None, Some(token)) => { - yield ParseEvent::Err(ParseError::new_unexpected_token(token).into()); - return; - } - (LRAction::None, None) => { - yield ParseEvent::Err(ParseError::UnexpectedEOF.into()); - return; - } - _ => unreachable!(), - } - } - } - } -} diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr1/tests/simple.rs deleted file mode 100644 index 1acd706..0000000 --- a/crates/parse_lr1/tests/simple.rs +++ /dev/null @@ -1,105 +0,0 @@ -use serde::{Serialize, Deserialize}; - -use copager_cfg::token::TokenTag; -use copager_cfg::rule::{RuleTag, Rule, RuleElem}; -use copager_lex::{LexSource, LexDriver}; -use copager_lex_regex::RegexLexer; -use copager_parse::{ParseSource, ParseDriver, ParseEvent}; -use copager_parse_lr1::LR1; - -#[derive( - Debug, Default, Copy, Clone, Hash, PartialEq, Eq, - LexSource, Serialize, Deserialize -)] -enum ExprToken { - #[default] - #[token(text = r"\+")] - Plus, - #[token(text = r"-")] - Minus, - #[token(text = r"\*")] - Mul, - #[token(text = r"/")] - Div, - #[token(text = r"\(")] - BracketL, - #[token(text = r"\)")] - BracketR, - #[token(text = r"[1-9][0-9]*")] - Num, - #[token(text = r"[ \t\n]+", ignored)] - _Whitespace, -} - -#[derive( - Debug, Default, Copy, Clone, Hash, PartialEq, Eq, - ParseSource, Serialize, Deserialize -)] -enum ExprRule { - #[default] - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} - -type MyLexer = RegexLexer; -type MyParser = LR1; - -const OK_INPUTS: [&str; 10] = [ - "10", - "10 + 20", - "10 - 20", - "10 * 20", - "10 / 20", - "10 + 20 * 30 - 40", - "(10)", - "((((10))))", - "10 * (20 - 30)", - "((10 + 20) * (30 / 40)) - 50", -]; - -const ERR_INPUTS: [&str; 7] = [ - "()", - "(10 -", - "10 +", - "*", - "10 20 + 30", - "10 + 20 * 30 / 40 (", - "(((10))", -]; - -#[test] -fn simple_success() { - for input in &OK_INPUTS { - assert!(parse(input), "{}", input); - } -} - -#[test] -fn simple_failure() { - for input in &ERR_INPUTS { - assert!(!parse(input), "{}", input); - } -} - -fn parse<'input>(input: &'input str) -> bool { - let source = ExprToken::default(); - let lexer = >::try_from(source).unwrap(); - - let source = (ExprToken::default(), ExprRule::default()); - let parser = >::try_from(source).unwrap(); - - let mut parse_itr = parser.run(lexer.run(input)); - let is_err = |state| matches!(state, ParseEvent::Err(_)); - let err_happened = parse_itr.any(is_err); - - !err_happened -} diff --git a/crates/parse_lr_common/Cargo.toml b/crates/parse_lr_common/Cargo.toml new file mode 100644 index 0000000..ecc8628 --- /dev/null +++ b/crates/parse_lr_common/Cargo.toml @@ -0,0 +1,15 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_common" +version = "0.2.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } +copager_core = { path = "../core" } +copager_cfg = { path = "../cfg" } +copager_parse = { path = "../parse" } +copager_parse_common = { path = "../parse_common" } diff --git a/crates/parse_lr_common/src/automaton.rs b/crates/parse_lr_common/src/automaton.rs new file mode 100644 index 0000000..f05d586 --- /dev/null +++ b/crates/parse_lr_common/src/automaton.rs @@ -0,0 +1,10 @@ +use copager_cfg::token::TokenTag; +use copager_cfg::rule::RuleElem; + +pub mod lr0; +pub mod lr1; + +pub trait Automaton<'a: 'b, 'b, T: TokenTag + 'a> { + fn len(&self) -> usize; + fn edges(&'b self) -> impl Iterator)>; +} diff --git a/crates/parse_lr_common/src/automaton/lr0.rs b/crates/parse_lr_common/src/automaton/lr0.rs new file mode 100644 index 0000000..dbf522f --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr0.rs @@ -0,0 +1,4 @@ +pub mod item; +pub mod dfa; + +pub use dfa::LR0DFA; diff --git a/crates/parse_lr_common/src/automaton/lr0/dfa.rs b/crates/parse_lr_common/src/automaton/lr0/dfa.rs new file mode 100644 index 0000000..1c0b945 --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr0/dfa.rs @@ -0,0 +1,224 @@ +use std::collections::{HashMap, BTreeMap}; +use std::fmt::Debug; +use std::hash::Hash; +use std::rc::Rc; +use std::sync::RwLock; +use std::marker::PhantomData; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; + +use crate::automaton::Automaton; +use crate::lr0::item::{LR0Item, LR0ItemSet}; + +#[derive(Clone)] +pub struct LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub id: usize, + pub itemset: LR0ItemSet<'a, T, R>, + pub next: Vec<(&'a RuleElem, Rc>)>, // (cond, next_node) +} + +impl<'a, T, R> Debug for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + #[derive(Debug)] + #[allow(dead_code)] + struct LR0DFANode<'a, 'b, T, R> + where + T: TokenTag, + R: RuleTag, + { + id: usize, + itemset: &'b LR0ItemSet<'a, T, R>, + next: Vec<(&'a RuleElem, usize)>, + } + + let id = self.id; + let itemset = &self.itemset; + let next = self.next + .iter() + .map(|(cond, next_node)| (*cond, next_node.read().unwrap().id)) + .collect::>(); + + if f.alternate() { + return write!(f, "{:#?}", LR0DFANode { id, itemset, next }); + } else { + write!(f, "{:?}", LR0DFANode { id, itemset, next }) + } + } +} + +impl<'a, T, R> Hash for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.id.hash(state); + self.itemset.hash(state); + } +} + +impl<'a, T, R> PartialEq for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.id == other.id && self.itemset == other.itemset + } +} + +impl<'a, T, R> Eq for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, T, R> LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn find_all(&self, rule: &Rule) -> impl Iterator> { + self.find_all_by(move |item| item.rule == rule) + } + + pub fn find_all_by(&self, cond: F) -> impl Iterator> + where + F: Fn(&&LR0Item<'a, T, R>) -> bool + { + self.itemset + .items + .iter() + .filter(cond) + .map(|item| item.rule) + } +} + +#[derive(Debug)] +pub struct LR0DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub nodes: Vec>>>, + pub edges: Vec<(usize, usize, &'a RuleElem)>, +} + +impl<'a, T, R> From<&'a RuleSet> for LR0DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let dfa_top = LR0DFABuilder::new().start(ruleset); + + let mut nodes = BTreeMap::new(); + let mut edges = vec![]; + let mut stack = vec![dfa_top]; + while let Some(node) = stack.pop() { + let from = node.read().unwrap().id; + if nodes.contains_key(&from) { + continue; + } + for (cond, next_node) in &node.read().unwrap().next { + let to = next_node.read().unwrap().id; + edges.push((from, to, *cond)); + stack.push(Rc::clone(next_node)); + } + nodes.insert(from, Rc::clone(&node)); + } + + let nodes = nodes + .into_iter() + .map(|(_, node)| node) + .collect(); + + LR0DFA { nodes, edges } + } +} + +impl<'a: 'b, 'b, T, R> Automaton<'a, 'b, T> for LR0DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn len(&self) -> usize { + self.nodes.len() + } + + fn edges(&'b self) -> impl Iterator)> { + self.edges.iter() + } +} + +#[derive(Debug)] +struct LR0DFABuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + itemsets: HashMap, Rc>>>, + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl<'a, T, R> LR0DFABuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn new() -> Self { + LR0DFABuilder { + itemsets: HashMap::new(), + _phantom_t: PhantomData, + _phantom_r: PhantomData, + } + } + + fn start(mut self, ruleset: &'a RuleSet) -> Rc>> { + let top = RuleElem::NonTerm(ruleset.top.clone()); + let top = ruleset.rules + .iter() + .find(|rule| rule.lhs == top) + .unwrap(); + let top = LR0ItemSet::from(ruleset).init(top); + + self.gen_recursive(top) + } + + fn gen_recursive(&mut self, mut itemset: LR0ItemSet<'a, T, R>) -> Rc>> + where + T: TokenTag, + { + if let Some(node) = self.itemsets.get(&itemset) { + return Rc::clone(node); + } + + let id = self.itemsets.len(); + let node = LR0DFANode { id, itemset: itemset.clone(), next: vec![] }; + let node = Rc::new(RwLock::new(node)); + self.itemsets.insert(itemset.clone(), Rc::clone(&node)); + + let mut next = vec![]; + for (cond, nextset) in itemset.gen_next_sets() { + next.push((cond, self.gen_recursive(nextset))); + } + node.write().unwrap().next = next; + + Rc::clone(&node) + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/automaton/lr0/item.rs b/crates/parse_lr_common/src/automaton/lr0/item.rs new file mode 100644 index 0000000..1cf6449 --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr0/item.rs @@ -0,0 +1,213 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::{Display, Debug}; +use std::hash::Hash; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; + +#[derive(Clone, Copy, Hash, PartialEq, Eq)] +pub struct LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub rule: &'a Rule, + pub dot_pos: usize, +} + +impl<'a, T, R> Display for LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} -> ", self.rule.lhs)?; + for (i, elem) in self.rule.rhs.iter().enumerate() { + if i == self.dot_pos { + write!(f, "• ")?; + } + write!(f, "{} ", elem)?; + } + if self.dot_pos == self.rule.rhs.len() { + write!(f, "•")?; + } + write!(f, "") + } +} + +impl<'a, T, R> Debug for LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + +impl<'a, T, R> From<&'a Rule> for LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(rule: &'a Rule) -> Self { + if rule.rhs[0] == RuleElem::Epsilon { + LR0Item { rule, dot_pos: 1 } + } else { + LR0Item { rule, dot_pos: 0 } + } + } +} + +impl<'a, T, R> LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn gen_next(&self) -> Self { + assert!(self.dot_pos + 1 <= self.rule.rhs.len()); + LR0Item { + rule: self.rule, + dot_pos: self.dot_pos + 1, + } + } + + pub fn check_next_elem(&self) -> Option<&'a RuleElem> { + if self.dot_pos < self.rule.rhs.len() { + Some(&self.rule.rhs[self.dot_pos]) + } else { + None + } + } +} + +#[derive(Clone)] +pub struct LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub items: Vec>, + ruleset: &'a RuleSet, +} + +impl<'a, T, R> Debug for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if f.alternate() { + write!(f, "{:#?}", self.items) + } else { + write!(f, "{:?}", self.items) + } + } +} + +impl<'a, T, R> From<&'a RuleSet> for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + LR0ItemSet { + items: vec![], + ruleset, + } + } +} + +impl<'a, T, R> Hash for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.items.hash(state); + } +} + +impl<'a, T, R> PartialEq for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.items == other.items + } +} + +impl <'a, T, R> Eq for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, T, R> LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn init(mut self, rule: &'a Rule) -> Self { + self.items = vec![LR0Item::from(rule)]; + self + } + + pub fn gen_next_sets(&mut self) -> impl Iterator, LR0ItemSet<'a, T, R>)> { + self.expand(); + + let mut next_set_candidates = HashMap::new(); + self.items + .iter() + .filter_map(|item| item.check_next_elem().map(|nelem| (nelem, item))) + .for_each(|(nelem, item) | { + next_set_candidates + .entry(nelem) + .or_insert_with(HashSet::new) + .insert(item.gen_next()); + }); + + next_set_candidates + .into_iter() + .map(|(cond, items)| { + let items = items.into_iter().collect(); + (cond, LR0ItemSet { items, ruleset: self.ruleset }) + }) + } + + fn expand(&mut self) { + let mut modified = true; + while modified { + modified = false; + let new_expaned = self.items + .iter() + .flat_map(|item| self.expand_once(item)) + .flatten() + .collect::>(); + for item in new_expaned { + if self.items.contains(&item) { + continue; + } + self.items.push(item); + modified = true; + } + } + } + + fn expand_once(&self, item: &LR0Item<'a, T, R>) -> Option>> { + if let Some(nonterm@RuleElem::NonTerm(..)) = item.check_next_elem() { + Some(self.ruleset + .find_rule(nonterm) + .into_iter() + .map(|rule| LR0Item::from(rule))) + } else { + None + } + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/automaton/lr1.rs b/crates/parse_lr_common/src/automaton/lr1.rs new file mode 100644 index 0000000..6e822ac --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr1.rs @@ -0,0 +1,4 @@ +pub mod item; +pub mod dfa; + +pub use dfa::LR1DFA; diff --git a/crates/parse_lr_common/src/automaton/lr1/dfa.rs b/crates/parse_lr_common/src/automaton/lr1/dfa.rs new file mode 100644 index 0000000..4a0d157 --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr1/dfa.rs @@ -0,0 +1,225 @@ +use std::collections::{HashMap, BTreeMap}; +use std::fmt::Debug; +use std::hash::Hash; +use std::rc::Rc; +use std::sync::RwLock; +use std::marker::PhantomData; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; +use copager_parse_common::rule::FirstSet; + +use crate::automaton::Automaton; +use crate::lr1::item::{LR1Item, LR1ItemSet}; + +#[derive(Clone)] +pub struct LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub id: usize, + pub itemset: LR1ItemSet<'a, 'b, T, R>, + pub next: Vec<(&'a RuleElem, Rc>)>, // (cond, next_node) +} + +impl<'a, 'b, T, R> Debug for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + #[derive(Debug)] + #[allow(dead_code)] + struct LR1DFANode<'a, 'b, 'c, T, R> + where + T: TokenTag, + R: RuleTag, + { + id: usize, + itemset: &'c LR1ItemSet<'a, 'b, T, R>, + next: Vec<(&'a RuleElem, usize)>, + } + + let id = self.id; + let itemset = &self.itemset; + let next = self.next + .iter() + .map(|(cond, next_node)| (*cond, next_node.read().unwrap().id)) + .collect::>(); + + if f.alternate() { + return write!(f, "{:#?}", LR1DFANode { id, itemset, next }); + } else { + write!(f, "{:?}", LR1DFANode { id, itemset, next }) + } + } +} + +impl<'a, 'b, T, R> Hash for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.id.hash(state); + self.itemset.hash(state); + } +} + +impl<'a, 'b, T, R> PartialEq for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.id == other.id && self.itemset == other.itemset + } +} + +impl<'a, 'b, T, R> Eq for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, 'b, T, R> LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn find_all(&self, rule: &Rule) -> impl Iterator, &'a RuleElem)> { + self.find_all_by(move |item| item.rule == rule) + } + + pub fn find_all_by(&self, cond: F) -> impl Iterator, &'a RuleElem)> + where + F: Fn(&&LR1Item<'a, T, R>) -> bool + { + self.itemset + .items + .iter() + .filter(cond) + .map(|item| (item.rule, item.la_token)) + } +} + +#[derive(Debug)] +pub struct LR1DFA<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub nodes: Vec>>>, + pub edges: Vec<(usize, usize, &'a RuleElem)>, +} + +impl<'a, 'b, T, R> From<(&'a RuleSet, &'b FirstSet<'a, T, R>)> for LR1DFA<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from((ruleset, first_set): (&'a RuleSet, &'b FirstSet<'a, T, R>)) -> Self { + let dfa_top = LR1DFABuilder::new().start(ruleset, &first_set); + + let mut nodes = BTreeMap::new(); + let mut edges = vec![]; + let mut stack = vec![dfa_top]; + while let Some(node) = stack.pop() { + let from = node.read().unwrap().id; + if nodes.contains_key(&from) { + continue; + } + for (cond, next_node) in &node.read().unwrap().next { + let to = next_node.read().unwrap().id; + edges.push((from, to, *cond)); + stack.push(Rc::clone(next_node)); + } + nodes.insert(from, Rc::clone(&node)); + } + + let nodes = nodes + .into_iter() + .map(|(_, node)| node) + .collect(); + + LR1DFA { nodes, edges } + } +} + +impl<'a: 'b, 'b, T, R> Automaton<'a, 'b, T> for LR1DFA<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn len(&self) -> usize { + self.nodes.len() + } + + fn edges(&'b self) -> impl Iterator)> { + self.edges.iter() + } +} + +#[derive(Debug)] +struct LR1DFABuilder<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + itemsets: HashMap, Rc>>>, + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl<'a, 'b, T, R> LR1DFABuilder<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn new() -> Self { + LR1DFABuilder { + itemsets: HashMap::new(), + _phantom_t: PhantomData, + _phantom_r: PhantomData, + } + } + + fn start(mut self, ruleset: &'a RuleSet, first_set: &'b FirstSet<'a, T, R>) -> Rc>> { + let top = RuleElem::NonTerm(ruleset.top.clone()); + let top = ruleset.rules + .iter() + .find(|rule| rule.lhs == top) + .unwrap(); + let top = LR1ItemSet::from((ruleset, first_set)).init(top); + + self.gen_recursive(top) + } + + fn gen_recursive(&mut self, mut itemset: LR1ItemSet<'a, 'b, T, R>) -> Rc>> + where + T: TokenTag, + { + if let Some(node) = self.itemsets.get(&itemset) { + return Rc::clone(node); + } + + let id = self.itemsets.len(); + let node = LR1DFANode { id, itemset: itemset.clone(), next: vec![] }; + let node = Rc::new(RwLock::new(node)); + self.itemsets.insert(itemset.clone(), Rc::clone(&node)); + + let mut next = vec![]; + for (cond, nextset) in itemset.gen_next_sets() { + next.push((cond, self.gen_recursive(nextset))); + } + node.write().unwrap().next = next; + + Rc::clone(&node) + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/automaton/lr1/item.rs b/crates/parse_lr_common/src/automaton/lr1/item.rs new file mode 100644 index 0000000..7c14247 --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr1/item.rs @@ -0,0 +1,230 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::{Display, Debug}; +use std::hash::Hash; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; +use copager_parse_common::rule::FirstSet; + +#[derive(Clone, Hash, PartialEq, Eq)] +pub struct LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub rule: &'a Rule, + pub dot_pos: usize, + pub la_token: &'a RuleElem, +} + +impl<'a, T, R> Display for LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} -> ", self.rule.lhs)?; + for (i, elem) in self.rule.rhs.iter().enumerate() { + if i == self.dot_pos { + write!(f, "• ")?; + } + write!(f, "{} ", elem)?; + } + if self.dot_pos == self.rule.rhs.len() { + write!(f, "•")?; + } + write!(f, "[{}]", self.la_token) + } +} + +impl<'a, T, R> Debug for LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + +impl<'a, T, R> From<(&'a Rule, &'a RuleElem)> for LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from((rule, la_token): (&'a Rule, &'a RuleElem)) -> Self { + if rule.rhs[0] == RuleElem::Epsilon { + LR1Item { rule, dot_pos: 1, la_token: &RuleElem::EOF } + } else { + LR1Item { rule, dot_pos: 0, la_token } + } + } +} + +impl<'a, T, R> LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn gen_next(&self) -> Self { + assert!(self.dot_pos + 1 <= self.rule.rhs.len()); + LR1Item { + rule: self.rule, + dot_pos: self.dot_pos + 1, + la_token: self.la_token, + } + } + + pub fn check_next_elem(&self) -> Option<&'a RuleElem> { + if self.dot_pos < self.rule.rhs.len() { + Some(&self.rule.rhs[self.dot_pos]) + } else { + None + } + } + + pub fn check_next_elems<'b>(&'b self) -> Vec> { + let mut next_elems = Vec::from(&self.rule.rhs[self.dot_pos..]); + next_elems.push(self.la_token.clone()); + next_elems + } +} + +#[derive(Clone)] +pub struct LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub items: Vec>, + ruleset: &'a RuleSet, + first_set: &'b FirstSet<'a, T, R>, +} + +impl<'a, 'b, T, R> Debug for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if f.alternate() { + write!(f, "{:#?}", self.items) + } else { + write!(f, "{:?}", self.items) + } + } +} + +impl<'a, 'b, T, R> From<(&'a RuleSet, &'b FirstSet<'a, T, R>)> for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from((ruleset, first_set): (&'a RuleSet, &'b FirstSet<'a, T, R>)) -> Self { + LR1ItemSet { + items: vec![], + ruleset, + first_set, + } + } +} + +impl<'a, 'b, T, R> Hash for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.items.hash(state); + } +} + +impl<'a, 'b, T, R> PartialEq for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.items == other.items + } +} + +impl <'a, 'b, T, R> Eq for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, 'b, T, R> LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn init(mut self, rule: &'a Rule) -> Self { + self.items = vec![LR1Item::from((rule, &RuleElem::EOF))]; + self + } + + pub fn gen_next_sets(&mut self) -> impl Iterator, LR1ItemSet<'a, 'b, T, R>)> { + self.expand(); + + let mut next_set_candidates = HashMap::new(); + self.items + .iter() + .filter_map(|item| item.check_next_elem().map(|nelem| (nelem, item))) + .for_each(|(nelem, item) | { + next_set_candidates + .entry(nelem) + .or_insert_with(HashSet::new) + .insert(item.gen_next()); + }); + + next_set_candidates + .into_iter() + .map(|(cond, items)| { + let items = items.into_iter().collect(); + (cond, LR1ItemSet { items, ruleset: self.ruleset, first_set: self.first_set }) + }) + } + + fn expand(&mut self) { + let mut modified = true; + while modified { + modified = false; + let new_expaned = self.items + .iter() + .flat_map(|item| self.expand_once(item)) + .flatten() + .collect::>(); + for item in new_expaned { + if self.items.contains(&item) { + continue; + } + self.items.push(item); + modified = true; + } + } + } + + fn expand_once(&self, item: &LR1Item<'a, T, R>) -> Option>> { + if let Some(nonterm@RuleElem::NonTerm(..)) = item.check_next_elem() { + Some(self.ruleset + .find_rule(nonterm) + .into_iter() + .flat_map(|rule| { + let next_elems = item.check_next_elems(); + self.first_set + .get_by(&next_elems[1..]) + .into_iter() + .map(move |la_token| LR1Item::from((rule, la_token))) + })) + } else { + None + } + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/driver.rs b/crates/parse_lr_common/src/driver.rs new file mode 100644 index 0000000..48c8a1f --- /dev/null +++ b/crates/parse_lr_common/src/driver.rs @@ -0,0 +1,87 @@ +use copager_cfg::token::{TokenTag, Token}; +use copager_cfg::rule::{RuleElem, RuleTag}; +use copager_parse::ParseEvent; + +use crate::error::LRError; +use crate::table::{LRAction, LRTable}; + +pub struct LRDriver<'table, T, R> +where + T: TokenTag, + R: RuleTag, +{ + table: &'table LRTable, + stack: Vec, + accepted: bool, +} + +impl<'table, T, R> From<&'table LRTable> for LRDriver<'table, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(table: &'table LRTable) -> Self { + LRDriver { + table, + stack: vec![0], + accepted: false, + } + } +} + +impl<'table, 'input, T, R> LRDriver<'table, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn reset(&mut self) { + self.stack = vec![0]; + } + + pub gen fn consume(&mut self, token: Option>) -> ParseEvent<'input, T, R> { + loop { + let top = self.stack[self.stack.len() - 1]; + let action = self.table.get_action(top, token); + match (action, token) { + (LRAction::Shift(new_state), Some(token)) => { + self.stack.push(*new_state); + yield ParseEvent::Read(token); + break; + }, + (LRAction::Reduce(rule), _) => { + let tag = rule.tag.unwrap(); + let lhs = lhs_as_str(&rule.lhs); + let rhs_len = rule.rhs.len(); + self.stack.truncate(self.stack.len() - rhs_len); + self.stack.push(self.table.get_goto(self.stack[self.stack.len()-1], lhs).unwrap()); + yield ParseEvent::Parse { rule: tag, len: rhs_len }; + }, + (LRAction::Accept, _) => { + self.accepted = true; + return; + } + (LRAction::None, Some(token)) => { + yield ParseEvent::Err(LRError::new_unexpected_token(token).into()); + return; + } + (LRAction::None, None) => { + yield ParseEvent::Err(LRError::new_unexpected_eof().into()); + return; + } + _ => unreachable!(), + } + } + } + + pub fn accepted(&self) -> bool { + self.accepted + } +} + +fn lhs_as_str(lhs: &RuleElem) -> &str { + if let RuleElem::NonTerm(nt) = lhs { + nt.as_str() + } else { + unreachable!() + } +} diff --git a/crates/parse_lr_common/src/error.rs b/crates/parse_lr_common/src/error.rs new file mode 100644 index 0000000..ec8f0ba --- /dev/null +++ b/crates/parse_lr_common/src/error.rs @@ -0,0 +1,48 @@ +use thiserror::Error; + +use copager_core::error::ParseError; +use copager_cfg::token::{TokenTag, Token}; +use copager_cfg::rule::RuleTag; + +use crate::table::LRAction; + +#[derive(Debug, Error)] +pub enum LRError { + #[error("Conflict occured at [{action}]")] + Conflilct { + action: String, + }, + #[error("Unexpected token {actual:?} found")] + UnexpectedToken { + actual: String, + }, + #[error("Unexpected EOF")] + UnexpectedEOF, +} + +impl LRError { + pub fn new_conflict(action: &LRAction) -> ParseError + where + T: TokenTag, + R: RuleTag, + { + let action = match action { + LRAction::Shift(state) => format!("Shift({})", state), + LRAction::Reduce(rule) => format!("Reduce({})", rule), + LRAction::Accept => format!("Accept"), + _ => unimplemented!(), + }; + ParseError::from(LRError::Conflilct{ action }) + } + + pub fn new_unexpected_token(expected: Token) -> ParseError { + let err = LRError::UnexpectedToken { + actual: format!("{:?}", expected.kind), + }; + ParseError::from(err).with(expected) + } + + pub fn new_unexpected_eof() -> ParseError { + ParseError::from(LRError::UnexpectedEOF) + } +} diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs new file mode 100644 index 0000000..36137d4 --- /dev/null +++ b/crates/parse_lr_common/src/lib.rs @@ -0,0 +1,14 @@ +#![feature(gen_blocks)] + +mod automaton; +mod error; +mod driver; +mod table; + +// LR 共通部品 +pub use table::{LRAction, LRTable, LRTableBuilder}; +pub use driver::LRDriver; + +// LR オートマトン +pub use automaton::lr0; +pub use automaton::lr1; diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs new file mode 100644 index 0000000..bc1be23 --- /dev/null +++ b/crates/parse_lr_common/src/table.rs @@ -0,0 +1,136 @@ +use std::collections::HashMap; + +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; + +use crate::automaton::Automaton; +use crate::error::LRError; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum LRAction +where + T: TokenTag, + R: RuleTag, +{ + Shift(usize), + Reduce(Rule), + Accept, + None, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LRTable +where + T: TokenTag, + R: RuleTag, +{ + pub action_table: Vec>>, + pub eof_action_table: Vec>, + pub goto_table: Vec>, +} + +impl LRTable +where + T: TokenTag, + R: RuleTag, +{ + pub fn get_action(&self, state: usize, token: Option>) -> &LRAction { + if let Some(token) = token { + return &self.action_table[state].get(&token.kind).unwrap_or(&LRAction::None) + } else { + return &self.eof_action_table[state] + } + } + + pub fn get_goto(&self, state: usize, nonterm: &str) -> Option { + self.goto_table[state].get(nonterm).copied() + } +} + +#[derive(Debug)] +pub struct LRTableBuilder +where + T: TokenTag, + R: RuleTag, +{ + pub action_table: Vec>>, + pub eof_action_table: Vec>, + pub goto_table: Vec>, +} + +impl<'a: 'b, 'b, T, R> LRTableBuilder +where + T: TokenTag + 'a, + R: RuleTag, +{ + pub fn from(automaton: &'b impl Automaton<'a, 'b, T>) -> Self { + let size = automaton.len(); + + // 初期化 + let mut action_table: Vec>> = Vec::with_capacity(size); + let mut eof_action_table = Vec::with_capacity(size); + let mut goto_table = Vec::with_capacity(size); + for _ in 0..size { + action_table.push(HashMap::new()); + eof_action_table.push(LRAction::None); + goto_table.push(HashMap::new()); + } + + // 表の作成 + for (from, to, elem) in automaton.edges() { + match elem { + RuleElem::Term(token) => { + action_table[*from].insert(*token, LRAction::Shift(*to)); + } + RuleElem::NonTerm(name) => { + goto_table[*from].insert(name.clone(), *to); + }, + _ => {} + } + } + + LRTableBuilder { + action_table, + eof_action_table, + goto_table, + } + } + + pub fn set(&mut self, state: usize, token: Option, action: LRAction) { + if let Some(token) = token { + self.action_table[state].insert(token, action); + } else { + self.eof_action_table[state] = action; + } + } + + pub fn try_set(&mut self, state: usize, token: Option, action: LRAction) -> anyhow::Result<()>{ + if let Some(token) = token { + if self.action_table[state].contains_key(&token) { + return Err(LRError::new_conflict(&action).into()); + } + self.action_table[state].insert(token, action); + } else { + if self.eof_action_table[state] != LRAction::None { + return Err(LRError::new_conflict(&action).into()); + } + self.eof_action_table[state] = action; + } + Ok(()) + } + + pub fn build(self) -> LRTable { + LRTable { + action_table: self.action_table, + eof_action_table: self.eof_action_table, + goto_table: self.goto_table, + } + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr1/Cargo.toml b/crates/parse_lr_lr0/Cargo.toml similarity index 67% rename from crates/parse_lr1/Cargo.toml rename to crates/parse_lr_lr0/Cargo.toml index 3d628d4..4279751 100644 --- a/crates/parse_lr1/Cargo.toml +++ b/crates/parse_lr_lr0/Cargo.toml @@ -1,23 +1,23 @@ cargo-features = ["edition2024"] [package] -name = "copager_parse_lr1" +name = "copager_parse_lr_lr0" version = "0.2.0" edition = "2024" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde = { workspace = true } -itertools = "0.12.1" -copager_core = { path = "../core" } +serde = { workspace = true, features = ["derive"] } copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } +copager_parse_lr_common = { path = "../parse_lr_common" } copager_utils = { path = "../utils" } [dev-dependencies] +copager_core = { path = "../core" } copager_lex = { path = "../lex", features = ["derive"] } -copager_lex_regex = { path = "../lex_regex" } +copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } -copager_parse_lr1 = { path = "../parse_lr1" } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/parse_lr_lr0/src/lib.rs b/crates/parse_lr_lr0/src/lib.rs new file mode 100644 index 0000000..7330f0b --- /dev/null +++ b/crates/parse_lr_lr0/src/lib.rs @@ -0,0 +1,128 @@ +#![feature(gen_blocks)] + +use std::marker::PhantomData; + +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; +use copager_lex::LexSource; +use copager_parse::{ParseDriver, ParseSource, ParseEvent}; +use copager_parse_lr_common::lr0::item::LR0Item; +use copager_parse_lr_common::lr0::LR0DFA; +use copager_parse_lr_common::{LRDriver, LRAction, LRTable, LRTableBuilder}; +use copager_utils::cache::Cacheable; + +pub struct LR0 +where + T: TokenTag, + R: RuleTag +{ + table: LRTable, +} + +impl ParseDriver for LR0 +where + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from((source_l, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR0Table::try_from(source_l, source_p)?; + Ok(LR0 { table }) + } + + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut driver = LRDriver::from(&self.table); + while !driver.accepted() { + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } + } +} + +impl Cacheable<(Sl, Sp)> for LR0 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LRTable; + + fn new((source_l, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR0Table::try_from(source_l, source_p)?; + Ok(table) + } + + fn restore(table: Self::Cache) -> Self { + LR0 { table } + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct LR0Table +where + T: TokenTag, + R: RuleTag +{ + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl LR0Table +where + T: TokenTag, + R: RuleTag, +{ + fn try_from(source_l: Sl, source_p: Sp) -> anyhow::Result> + where + Sl: LexSource, + Sp: ParseSource, + { + // 最上位規則を追加して RuleSet を更新 + let mut ruleset = source_p.into_ruleset(); + let top_dummy = Rule::new( + None, + RuleElem::new_nonterm("__top_dummy"), + vec![RuleElem::new_nonterm(&ruleset.top)], + ); + ruleset.update_top(top_dummy.clone()); + + // LR(0) オートマトン作成 + let dfa = LR0DFA::from(&ruleset); + + // LR(0) 構文解析表作成 + let mut builder = LRTableBuilder::from(&dfa); + for node in dfa.nodes { + let node = node.read().unwrap(); + for rule in node.find_all_by(is_lr0_reduce_state) { + // S -> Top . を含む場合,EOF 列に対して Accept をマーク + if rule == &top_dummy { + builder.set(node.id, None, LRAction::Accept); + continue; + } + + // A -> α β . を含む場合 全列に Reduce をマーク + builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; + for token in source_l.iter() { + builder.try_set(node.id, Some(token), LRAction::Reduce(rule.clone()))?; + } + } + } + let table = builder.build(); + + Ok(table) + } +} + +fn is_lr0_reduce_state(item: &&LR0Item) -> bool +where + T: TokenTag, + R: RuleTag, +{ + item.check_next_elem().is_none() +} diff --git a/crates/parse_lr_lr0/tests/simple.rs b/crates/parse_lr_lr0/tests/simple.rs new file mode 100644 index 0000000..1d78194 --- /dev/null +++ b/crates/parse_lr_lr0/tests/simple.rs @@ -0,0 +1,90 @@ +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr_lr0::LR0; +use copager_ir_void::Void; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum TestToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum TestRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type TestGrammar = Grammar; +type TestLexer = RegexLexer; +type TestParser = LR0; +type TestProcessor = Processor; + +#[test] +fn simple_success() { + const OK_INPUTS: [&str; 8] = [ + "10", + "10 + 20", + "10 - 20", + "10 + 20 + 30", + "(10)", + "((((10))))", + "10 + (20 - 30)", + "(10 + 20) - 30", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &OK_INPUTS { + println!("input: {}", input); + processor.process::(input).unwrap(); + } +} + +#[test] +fn simple_failure() { + const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "+", + "10 20 + 30", + "10 + 20 - 30 (", + "(((10))", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &ERR_INPUTS { + assert!(processor.process::(input).is_err(), "input: {}", input); + } +} diff --git a/crates/parse_lr_lr1/Cargo.toml b/crates/parse_lr_lr1/Cargo.toml new file mode 100644 index 0000000..287b880 --- /dev/null +++ b/crates/parse_lr_lr1/Cargo.toml @@ -0,0 +1,24 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_lr1" +version = "0.2.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_parse_common = { path = "../parse_common" } +copager_parse_lr_common = { path = "../parse_lr_common" } +copager_utils = { path = "../utils" } + +[dev-dependencies] +copager_core = { path = "../core" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/parse_lr_lr1/src/lib.rs b/crates/parse_lr_lr1/src/lib.rs new file mode 100644 index 0000000..bdf78d8 --- /dev/null +++ b/crates/parse_lr_lr1/src/lib.rs @@ -0,0 +1,134 @@ +#![feature(gen_blocks)] + +use std::marker::PhantomData; + +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; +use copager_lex::LexSource; +use copager_parse::{ParseDriver, ParseSource, ParseEvent}; +use copager_parse_common::rule::FirstSet; +use copager_parse_lr_common::lr1::item::LR1Item; +use copager_parse_lr_common::lr1::LR1DFA; +use copager_parse_lr_common::{LRDriver, LRAction, LRTable, LRTableBuilder}; +use copager_utils::cache::Cacheable; + +pub struct LR1 +where + T: TokenTag, + R: RuleTag +{ + table: LRTable, +} + +impl ParseDriver for LR1 +where + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR1Table::try_from(source_p)?; + Ok(LR1 { table }) + } + + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut driver = LRDriver::from(&self.table); + while !driver.accepted() { + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } + } +} + +impl Cacheable<(Sl, Sp)> for LR1 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LRTable; + + fn new((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR1Table::try_from(source_p)?; + Ok(table) + } + + fn restore(table: Self::Cache) -> Self { + LR1 { table } + } +} + +pub struct LR1Table +where + T: TokenTag, + R: RuleTag +{ + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl LR1Table +where + T: TokenTag, + R: RuleTag, +{ + fn try_from(source_p: Sp) -> anyhow::Result> + where + Sp: ParseSource, + { + // 最上位規則を追加して RuleSet を更新 + let mut ruleset = source_p.into_ruleset(); + let top_dummy = Rule::new( + None, + RuleElem::new_nonterm("__top_dummy"), + vec![RuleElem::new_nonterm(&ruleset.top)], + ); + ruleset.update_top(top_dummy.clone()); + + // First 集合作成 + let first_set = FirstSet::from(&ruleset); + + // LR(1) オートマトン作成 + let dfa = LR1DFA::from((&ruleset, &first_set)); + + // LR(1) 構文解析表作成 + let mut builder = LRTableBuilder::from(&dfa); + for node in &dfa.nodes { + let node = node.read().unwrap(); + for (rule, la_token) in node.find_all_by(is_lr1_reduce_state) { + // A -> α β . [la_token] を含む場合,la_token 列に対して Reduce をマーク + match la_token { + RuleElem::Term(term) => { + builder.try_set(node.id, Some(*term), LRAction::Reduce(rule.clone()))?; + } + RuleElem::EOF => { + builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; + } + _ => {} + } + + // S -> Top . を含む場合,EOF 列に対して Accept をマーク + if rule == &top_dummy { + builder.set(node.id, None, LRAction::Accept); + } + } + } + let table = builder.build(); + + Ok(table) + } +} + +fn is_lr1_reduce_state(item: &&LR1Item) -> bool +where + T: TokenTag, + R: RuleTag, +{ + item.check_next_elem().is_none() +} diff --git a/crates/parse_lr_lr1/tests/simple.rs b/crates/parse_lr_lr1/tests/simple.rs new file mode 100644 index 0000000..4d94a5b --- /dev/null +++ b/crates/parse_lr_lr1/tests/simple.rs @@ -0,0 +1,100 @@ +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr_lr1::LR1; +use copager_ir_void::Void; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum TestToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum TestRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type TestGrammar = Grammar; +type TestLexer = RegexLexer; +type TestParser = LR1; +type TestProcessor = Processor; + +#[test] +fn simple_success() { + const OK_INPUTS: [&str; 10] = [ + "10", + "10 + 20", + "10 - 20", + "10 * 20", + "10 / 20", + "10 + 20 * 30 - 40", + "(10)", + "((((10))))", + "10 * (20 - 30)", + "((10 + 20) * (30 / 40)) - 50", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &OK_INPUTS { + println!("input: {}", input); + processor.process::(input).unwrap(); + } +} + +#[test] +fn simple_failure() { + const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "*", + "10 20 + 30", + "10 + 20 * 30 / 40 (", + "(((10))", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &ERR_INPUTS { + assert!(processor.process::(input).is_err(), "input: {}", input); + } +} diff --git a/crates/parse_lr_slr1/Cargo.toml b/crates/parse_lr_slr1/Cargo.toml new file mode 100644 index 0000000..e6a1254 --- /dev/null +++ b/crates/parse_lr_slr1/Cargo.toml @@ -0,0 +1,24 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_slr1" +version = "0.2.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_parse_common = { path = "../parse_common" } +copager_parse_lr_common = { path = "../parse_lr_common" } +copager_utils = { path = "../utils" } + +[dev-dependencies] +copager_core = { path = "../core" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/parse_lr_slr1/src/lib.rs b/crates/parse_lr_slr1/src/lib.rs new file mode 100644 index 0000000..4c39188 --- /dev/null +++ b/crates/parse_lr_slr1/src/lib.rs @@ -0,0 +1,146 @@ +#![feature(gen_blocks)] + +use std::marker::PhantomData; + +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; +use copager_lex::LexSource; +use copager_parse::{ParseDriver, ParseSource, ParseEvent}; +use copager_parse_common::rule::FollowSet; +use copager_parse_lr_common::lr0::item::LR0Item; +use copager_parse_lr_common::lr0::LR0DFA; +use copager_parse_lr_common::{LRDriver, LRAction, LRTable, LRTableBuilder}; +use copager_utils::cache::Cacheable; + +pub struct SLR1 +where + T: TokenTag, + R: RuleTag +{ + table: LRTable, +} + +impl ParseDriver for SLR1 +where + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = SLR1Table::try_from(source_p)?; + Ok(SLR1 { table }) + } + + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut driver = LRDriver::from(&self.table); + while !driver.accepted() { + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } + } +} + +impl Cacheable<(Sl, Sp)> for SLR1 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LRTable; + + fn new((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = SLR1Table::try_from(source_p)?; + Ok(table) + } + + fn restore(table: Self::Cache) -> Self { + SLR1 { table } + } +} + +pub struct SLR1Table +where + T: TokenTag, + R: RuleTag +{ + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl SLR1Table +where + T: TokenTag, + R: RuleTag, +{ + fn try_from(source_p: Sp) -> anyhow::Result> + where + Sp: ParseSource, + { + // 最上位規則を追加して RuleSet を更新 + let mut ruleset = source_p.into_ruleset(); + let top_dummy = Rule::new( + None, + RuleElem::new_nonterm("__top_dummy"), + vec![RuleElem::new_nonterm(&ruleset.top)], + ); + ruleset.update_top(top_dummy.clone()); + + // Follow 集合作成 + let follow_set = FollowSet::from(&ruleset); + + // LR(0) オートマトン作成 + let dfa = LR0DFA::from(&ruleset); + + // SLR(1) 構文解析表作成 + let mut builder = LRTableBuilder::from(&dfa); + for node in dfa.nodes { + let node = node.read().unwrap(); + + // A -> α β . を含む場合,Follow(A) 列に対して Reduce をマーク + for rule in node.find_all_by(is_slr1_reduce_state) { + let lhs = lhs_as_str(&rule.lhs); + for term in follow_set.get(lhs).unwrap() { + match term { + RuleElem::Term(term) => { + builder.try_set(node.id, Some(*term), LRAction::Reduce(rule.clone()))?; + } + RuleElem::EOF => { + builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; + } + _ => {} + } + } + + // S -> Top . を含む場合,EOF 列に対して Accept をマーク + if rule == &top_dummy { + builder.set(node.id, None, LRAction::Accept); + } + } + } + let table = builder.build(); + + Ok(table) + } +} + +fn is_slr1_reduce_state(item: &&LR0Item) -> bool +where + T: TokenTag, + R: RuleTag, +{ + item.check_next_elem().is_none() +} + +fn lhs_as_str(lhs: &RuleElem) -> &str { + if let RuleElem::NonTerm(nt) = lhs { + nt.as_str() + } else { + unreachable!() + } +} diff --git a/crates/parse_lr_slr1/tests/simple.rs b/crates/parse_lr_slr1/tests/simple.rs new file mode 100644 index 0000000..7211d75 --- /dev/null +++ b/crates/parse_lr_slr1/tests/simple.rs @@ -0,0 +1,100 @@ +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr_slr1::SLR1; +use copager_ir_void::Void; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum TestToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum TestRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type TestGrammar = Grammar; +type TestLexer = RegexLexer; +type TestParser = SLR1; +type TestProcessor = Processor; + +#[test] +fn simple_success() { + const OK_INPUTS: [&str; 10] = [ + "10", + "10 + 20", + "10 - 20", + "10 * 20", + "10 / 20", + "10 + 20 * 30 - 40", + "(10)", + "((((10))))", + "10 * (20 - 30)", + "((10 + 20) * (30 / 40)) - 50", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &OK_INPUTS { + println!("input: {}", input); + processor.process::(input).unwrap(); + } +} + +#[test] +fn simple_failure() { + const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "*", + "10 20 + 30", + "10 + 20 * 30 / 40 (", + "(((10))", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &ERR_INPUTS { + assert!(processor.process::(input).is_err(), "input: {}", input); + } +} diff --git a/src/lib.rs b/src/lib.rs index 5b802dc..3bddbe7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,8 +18,10 @@ pub mod lex { pub mod parse { pub use copager_parse::*; + #[cfg(feature = "lr0")] + pub use copager_parse_lr_lr0::*; #[cfg(feature = "lr1")] - pub use copager_parse_lr1::*; + pub use copager_parse_lr_lr1::*; } pub mod ir { @@ -34,3 +36,9 @@ pub mod prelude { pub use copager_cfg::rule::{RuleTag, Rule, RuleElem}; pub use copager_cfg::token::TokenTag; } + +#[cfg(feature = "dev")] +pub mod dev { + pub use copager_parse_common::*; + pub use copager_parse_lr_common as lr; +}