From 94d7be7df25ebe8db3bdf50fd8d6aeef72fb580d Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 9 Nov 2024 17:52:36 +0900 Subject: [PATCH 01/48] =?UTF-8?q?[add]=20FirstSet=20=E6=A7=8B=E9=80=A0?= =?UTF-8?q?=E4=BD=93(=E9=9B=91)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 13 ++- Cargo.toml | 1 + crates/cfg/src/rule.rs | 6 +- crates/parse_lr_common/Cargo.toml | 15 +++ crates/parse_lr_common/src/lib.rs | 3 + crates/parse_lr_common/src/rule.rs | 1 + crates/parse_lr_common/src/rule/first.rs | 142 +++++++++++++++++++++++ 7 files changed, 177 insertions(+), 4 deletions(-) create mode 100644 crates/parse_lr_common/Cargo.toml create mode 100644 crates/parse_lr_common/src/lib.rs create mode 100644 crates/parse_lr_common/src/rule.rs create mode 100644 crates/parse_lr_common/src/rule/first.rs diff --git a/Cargo.lock b/Cargo.lock index dd06f09..0479efb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -198,6 +198,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_parse_lr_common" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "copager_parse", + "thiserror", +] + [[package]] name = "copager_utils" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index 928bba1..c155b5b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,6 +57,7 @@ members = [ "./crates/lex_regex", "./crates/parse", "./crates/parse_derive", + "./crates/parse_lr_common", "./crates/parse_lr1", "./crates/ir", "./crates/ir_void", diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 2f1bd24..b6a9a44 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::hash::Hash; @@ -103,11 +103,11 @@ impl FromIterator> for RuleSet { } impl RuleSet { - pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { + pub fn nonterms<'a>(&'a self) -> HashSet<&'a RuleElem> { self.rules.iter().flat_map(|rule| rule.nonterms()).collect() } - pub fn terms<'a>(&'a self) -> Vec<&'a RuleElem> { + pub fn terms<'a>(&'a self) -> HashSet<&'a RuleElem> { self.rules.iter().flat_map(|rule| rule.terms()).collect() } diff --git a/crates/parse_lr_common/Cargo.toml b/crates/parse_lr_common/Cargo.toml new file mode 100644 index 0000000..cbf10b2 --- /dev/null +++ b/crates/parse_lr_common/Cargo.toml @@ -0,0 +1,15 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_common" +version = "0.2.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } + +[dev-dependencies] +copager_lex = { path = "../lex", features = ["derive"] } +copager_parse = { path = "../parse", features = ["derive"] } diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs new file mode 100644 index 0000000..20d9298 --- /dev/null +++ b/crates/parse_lr_common/src/lib.rs @@ -0,0 +1,3 @@ +#![feature(gen_blocks)] + +pub mod rule; diff --git a/crates/parse_lr_common/src/rule.rs b/crates/parse_lr_common/src/rule.rs new file mode 100644 index 0000000..4df7547 --- /dev/null +++ b/crates/parse_lr_common/src/rule.rs @@ -0,0 +1 @@ +mod first; diff --git a/crates/parse_lr_common/src/rule/first.rs b/crates/parse_lr_common/src/rule/first.rs new file mode 100644 index 0000000..2af4272 --- /dev/null +++ b/crates/parse_lr_common/src/rule/first.rs @@ -0,0 +1,142 @@ +use std::collections::HashMap; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleElem, RuleSet}; + +pub struct FirstSet<'a, T: TokenTag> { + map: HashMap>, + ruleset: &'a RuleSet, +} + +impl<'a, T: TokenTag> TryFrom<&'a RuleSet> for FirstSet<'a, T> { + type Error = anyhow::Error; + + fn try_from(ruleset: &'a RuleSet) -> anyhow::Result { + let mut map = HashMap::new(); + for nonterm in ruleset.nonterms() { + if let RuleElem::NonTerm(nonterm) = nonterm { + let init_terms = rhs_terms(ruleset, nonterm).collect(); + map.insert(nonterm.clone(), init_terms); + } + } + + let mut first_set = FirstSet { map, ruleset }; + first_set.expand()?; + + Ok(first_set) + } +} + +impl<'a, T: TokenTag> FirstSet<'a, T> { + fn expand(&mut self) -> anyhow::Result { + let nonterms = &self.ruleset.nonterms(); + let nonterms = nonterms + .iter() + .map(|relem| match relem { + RuleElem::NonTerm(nonterm) => nonterm, + _ => unreachable!(), + }) + .collect::>(); + + let mut modified = false; + for &nonterm in &nonterms { + for rhs_nonterm in rhs_nonterms(self.ruleset, nonterm) { + let cand_terms = self.map.get(rhs_nonterm).unwrap().clone(); + let lhs_terms = self.map.get_mut(nonterm).unwrap(); + for term in cand_terms { + if !lhs_terms.contains(&term) { + lhs_terms.push(term); + modified = true; + } + } + } + } + + Ok(modified) + } +} + +impl<'a, T: TokenTag> FirstSet<'a, T> { + pub fn get(&self, nonterm: &str) -> Option<&[&T]> { + self.map.get(nonterm).map(|terms| terms.as_slice()) + } +} + +fn cmp_nonterm(relem: &RuleElem, lhs: &str) -> bool { + match relem { + RuleElem::NonTerm(nonterm) => nonterm == lhs, + _ => false, + } +} + +fn rhs_terms<'a, T>(ruleset: &'a RuleSet, nonterm: &str) -> impl Iterator +where + T: TokenTag, +{ + ruleset.rules + .iter() + .filter(move |rule| cmp_nonterm(&rule.lhs, nonterm)) + .flat_map(|rule| &rule.rhs) + .flat_map(|relem| match relem { + RuleElem::Term(term) => Some(term), + _ => None, + }) +} + +fn rhs_nonterms<'a, T>(ruleset: &'a RuleSet, nonterm: &str) -> impl Iterator +where + T: TokenTag, +{ + ruleset.rules + .iter() + .filter(move |rule| cmp_nonterm(&rule.lhs, nonterm)) + .flat_map(|rule| &rule.rhs) + .flat_map(|relem| match relem { + RuleElem::NonTerm(nonterm) => Some(nonterm.as_str()), + _ => None, + }) +} + +#[cfg(test)] +mod test { + use copager_cfg::token::TokenTag; + use copager_cfg::rule::{Rule, RuleTag, RuleElem}; + use copager_lex::LexSource; + use copager_parse::ParseSource; + + use super::FirstSet; + + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] + enum TestToken { + #[token(r"a")] + A, + #[token(r"b")] + B, + } + + #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] + enum TestRule { + #[default] + #[rule(" ::= ")] + S, + #[rule(" ::= A")] + A, + #[rule(" ::= A B")] + B, + } + + #[test] + fn first_set() { + let ruleset = TestRule::default().into_ruleset(); + let first_set = FirstSet::try_from(&ruleset).unwrap(); + + let expected = vec![&TestToken::A, &TestToken::B]; + assert_eq!(first_set.get("S"), Some(expected.as_slice())); + + let expected = vec![&TestToken::A]; + assert_eq!(first_set.get("A"), Some(expected.as_slice())); + + let expected = vec![&TestToken::A, &TestToken::B]; + assert_eq!(first_set.get("B"), Some(expected.as_slice())); + } +} From f82dd16f3e42357a76952ec8c333d8e1f1154bd2 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 9 Nov 2024 18:51:23 +0900 Subject: [PATCH 02/48] =?UTF-8?q?[fix]=20expand=20=E3=82=92=E5=8D=81?= =?UTF-8?q?=E5=88=86=E3=81=AA=E5=9B=9E=E6=95=B0=E5=AE=9F=E8=A1=8C=E3=81=99?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/rule.rs | 199 ----------------------- crates/parse_lr_common/src/rule.rs | 3 + crates/parse_lr_common/src/rule/first.rs | 18 +- 3 files changed, 10 insertions(+), 210 deletions(-) diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index b6a9a44..494db69 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -199,202 +199,3 @@ impl RuleSet { nulls_set } } - -// #[cfg(test)] -// mod test { -// use std::collections::HashMap; - -// use crate::token::TokenTag; -// use crate::RuleKind; - -// use super::{Rule, RuleElem}; - -// #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] -// enum TestToken { -// Num, -// Plus, -// Minus, -// Mul, -// Div, -// BracketA, -// BracketB, -// } - -// impl TokenKind<'_> for TestToken { -// fn as_str(&self) -> &'static str { -// match self { -// TestToken::Num => r"^[1-9][0-9]*", -// TestToken::Plus => r"^\+", -// TestToken::Minus => r"^-", -// TestToken::Mul => r"^\*", -// TestToken::Div => r"^/", -// TestToken::BracketA => r"^\(", -// TestToken::BracketB => r"^\)", -// } -// } - -// fn ignore_str() -> &'static str { -// r"^[ \t\n]+" -// } - -// fn into_iter() -> impl Iterator { -// vec![ -// TestToken::Num, -// TestToken::Plus, -// TestToken::Minus, -// TestToken::Mul, -// TestToken::Div, -// TestToken::BracketA, -// TestToken::BracketB, -// ] -// .into_iter() -// } -// } - -// #[derive(Debug, Clone, Hash, PartialEq, Eq)] -// enum TestRule { -// ExprPlus, -// ExprMinus, -// Expr2Term, -// TermMul, -// TermDiv, -// Term2Fact, -// Fact2Expr, -// Fact2Num, -// } - -// impl<'a> RuleKind<'a> for TestRule { -// type TokenKind = TestToken; - -// fn into_iter() -> impl Iterator { -// Box::new( -// vec![ -// TestRule::ExprPlus, -// TestRule::ExprMinus, -// TestRule::Expr2Term, -// TestRule::TermMul, -// TestRule::TermDiv, -// TestRule::Term2Fact, -// TestRule::Fact2Expr, -// TestRule::Fact2Num, -// ] -// .into_iter(), -// ) -// } - -// fn into_rules(&self) -> Vec> { -// let expr_plus = Rule::from(( -// RuleElem::new_nonterm("expr"), -// vec![ -// RuleElem::new_nonterm("expr"), -// RuleElem::new_term(TestToken::Plus), -// RuleElem::new_nonterm("term"), -// ], -// )); - -// let expr_minus = Rule::from(( -// RuleElem::new_nonterm("expr"), -// vec![ -// RuleElem::new_nonterm("expr"), -// RuleElem::new_term(TestToken::Minus), -// RuleElem::new_nonterm("term"), -// ], -// )); - -// let expr_2_term = Rule::::from(( -// RuleElem::new_nonterm("expr"), -// vec![RuleElem::new_nonterm("term")], -// )); - -// let term_mul = Rule::from(( -// RuleElem::new_nonterm("term"), -// vec![ -// RuleElem::new_nonterm("term"), -// RuleElem::new_term(TestToken::Mul), -// RuleElem::new_nonterm("fact"), -// ], -// )); - -// let term_div = Rule::from(( -// RuleElem::new_nonterm("term"), -// vec![ -// RuleElem::new_nonterm("term"), -// RuleElem::new_term(TestToken::Div), -// RuleElem::new_nonterm("fact"), -// ], -// )); - -// let term_2_fact = Rule::::from(( -// RuleElem::new_nonterm("term"), -// vec![RuleElem::new_nonterm("fact")], -// )); - -// let fact_2_expr = Rule::from(( -// RuleElem::new_nonterm("fact"), -// vec![ -// RuleElem::new_term(TestToken::BracketA), -// RuleElem::new_nonterm("expr"), -// RuleElem::new_term(TestToken::BracketB), -// ], -// )); - -// let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![])); - -// match self { -// TestRule::ExprPlus => vec![expr_plus], -// TestRule::ExprMinus => vec![expr_minus], -// TestRule::Expr2Term => vec![expr_2_term], -// TestRule::TermMul => vec![term_mul], -// TestRule::TermDiv => vec![term_div], -// TestRule::Term2Fact => vec![term_2_fact], -// TestRule::Fact2Expr => vec![fact_2_expr], -// TestRule::Fact2Num => vec![fact_2_num], -// } -// } -// } - -// fn check>( -// first_set: &HashMap<&RuleElem, Vec<&RuleElem>>, -// nonterm: T, -// exp_terms: Vec, -// ) { -// let nonterms = RuleElem::::new_nonterm(nonterm); -// let exp_terms: Vec> = exp_terms -// .into_iter() -// .map(|term| RuleElem::new_term(term)) -// .collect(); -// assert!(first_set.get(&nonterms).unwrap().len() == exp_terms.len()); - -// let result = first_set -// .get(&nonterms) -// .unwrap() -// .into_iter() -// .zip(exp_terms.into_iter()) -// .any(|(a, b)| a == &&b); -// assert!(result); -// } - -// #[test] -// fn first_set() { -// let ruleset = ::into_ruleset(); -// let first_set = ruleset.first_set(); - -// check( -// &first_set, -// "expr", -// vec![ -// TestToken::Plus, -// TestToken::Minus, -// TestToken::Mul, -// TestToken::Div, -// TestToken::BracketA, -// ], -// ); -// check( -// &first_set, -// "term", -// vec![TestToken::Mul, TestToken::Div, TestToken::BracketA], -// ); -// check(&first_set, "fact", vec![TestToken::BracketA]); -// } -// } diff --git a/crates/parse_lr_common/src/rule.rs b/crates/parse_lr_common/src/rule.rs index 4df7547..a309520 100644 --- a/crates/parse_lr_common/src/rule.rs +++ b/crates/parse_lr_common/src/rule.rs @@ -1 +1,4 @@ mod first; +mod follow; + +pub use first::FirstSet; diff --git a/crates/parse_lr_common/src/rule/first.rs b/crates/parse_lr_common/src/rule/first.rs index 2af4272..95668cf 100644 --- a/crates/parse_lr_common/src/rule/first.rs +++ b/crates/parse_lr_common/src/rule/first.rs @@ -8,10 +8,8 @@ pub struct FirstSet<'a, T: TokenTag> { ruleset: &'a RuleSet, } -impl<'a, T: TokenTag> TryFrom<&'a RuleSet> for FirstSet<'a, T> { - type Error = anyhow::Error; - - fn try_from(ruleset: &'a RuleSet) -> anyhow::Result { +impl<'a, T: TokenTag> From<&'a RuleSet> for FirstSet<'a, T> { + fn from(ruleset: &'a RuleSet) -> Self { let mut map = HashMap::new(); for nonterm in ruleset.nonterms() { if let RuleElem::NonTerm(nonterm) = nonterm { @@ -21,14 +19,13 @@ impl<'a, T: TokenTag> TryFrom<&'a RuleSet> for FirstSet<'a, T> { } let mut first_set = FirstSet { map, ruleset }; - first_set.expand()?; - - Ok(first_set) + while first_set.expand() {} + first_set } } impl<'a, T: TokenTag> FirstSet<'a, T> { - fn expand(&mut self) -> anyhow::Result { + fn expand(&mut self) -> bool { let nonterms = &self.ruleset.nonterms(); let nonterms = nonterms .iter() @@ -51,8 +48,7 @@ impl<'a, T: TokenTag> FirstSet<'a, T> { } } } - - Ok(modified) + modified } } @@ -128,7 +124,7 @@ mod test { #[test] fn first_set() { let ruleset = TestRule::default().into_ruleset(); - let first_set = FirstSet::try_from(&ruleset).unwrap(); + let first_set = FirstSet::from(&ruleset); let expected = vec![&TestToken::A, &TestToken::B]; assert_eq!(first_set.get("S"), Some(expected.as_slice())); From 7686af7211a809a493ba29561115ad00a486bb0c Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 9 Nov 2024 20:05:12 +0900 Subject: [PATCH 03/48] =?UTF-8?q?[fix]=20FirstSet=20=E3=81=AE=E5=AE=9F?= =?UTF-8?q?=E8=A3=85=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/rule.rs | 14 +- crates/parse_derive/src/impl/rule.rs | 5 + crates/parse_lr_common/src/rule/first.rs | 158 ++++++++++++++--------- 3 files changed, 104 insertions(+), 73 deletions(-) diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 494db69..f513644 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -44,28 +44,20 @@ impl Rule { } } -#[derive(Debug, Clone, Eq)] +#[derive(Debug, Clone, Hash, Eq)] pub enum RuleElem { NonTerm(String), Term(T), + Epsilon, EOF, } -impl Hash for RuleElem { - fn hash(&self, state: &mut H) { - match self { - RuleElem::NonTerm(s) => s.hash(state), - RuleElem::Term(t) => t.hash(state), - RuleElem::EOF => 0.hash(state), - } - } -} - impl PartialEq for RuleElem { fn eq(&self, other: &Self) -> bool { match (self, other) { (RuleElem::NonTerm(s1), RuleElem::NonTerm(s2)) => s1 == s2, (RuleElem::Term(t1), RuleElem::Term(t2)) => t1 == t2, + (RuleElem::Epsilon, RuleElem::Epsilon) => true, (RuleElem::EOF, RuleElem::EOF) => true, _ => false, } diff --git a/crates/parse_derive/src/impl/rule.rs b/crates/parse_derive/src/impl/rule.rs index 5031024..276539e 100644 --- a/crates/parse_derive/src/impl/rule.rs +++ b/crates/parse_derive/src/impl/rule.rs @@ -112,6 +112,11 @@ fn parse_rule(token: &TokenStream, input: &str) -> TokenStream { } }) .collect::>(); + let rhs = if rhs.len() == 0 { + vec![quote! { RuleElem::Epsilon }] + } else { + rhs + }; quote! { Rule::from((#lhs, vec![ #( #rhs, )* ])) } } diff --git a/crates/parse_lr_common/src/rule/first.rs b/crates/parse_lr_common/src/rule/first.rs index 95668cf..c04068a 100644 --- a/crates/parse_lr_common/src/rule/first.rs +++ b/crates/parse_lr_common/src/rule/first.rs @@ -1,96 +1,104 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleElem, RuleSet}; pub struct FirstSet<'a, T: TokenTag> { - map: HashMap>, - ruleset: &'a RuleSet, + map: HashMap>>, + _ruleset: &'a RuleSet, } impl<'a, T: TokenTag> From<&'a RuleSet> for FirstSet<'a, T> { + fn from(ruleset: &'a RuleSet) -> Self { + let build = FirstSetBuilder::from(ruleset).expand(); + let map = build.map + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect())) + .collect(); + + FirstSet { + map, + _ruleset: ruleset, + } + } +} + +impl<'a, T: TokenTag> FirstSet<'a, T> { + pub fn get(&self, nonterm: &str) -> Option<&[&'a RuleElem]> { + self.map.get(nonterm).map(|terms| terms.as_slice()) + } +} + +struct FirstSetBuilder<'a, T: TokenTag> { + map: HashMap>>, + ruleset: &'a RuleSet, + nonterms: Vec<&'a str>, +} + +impl<'a, T: TokenTag> From<&'a RuleSet> for FirstSetBuilder<'a, T> { fn from(ruleset: &'a RuleSet) -> Self { let mut map = HashMap::new(); for nonterm in ruleset.nonterms() { if let RuleElem::NonTerm(nonterm) = nonterm { - let init_terms = rhs_terms(ruleset, nonterm).collect(); - map.insert(nonterm.clone(), init_terms); + map.insert(nonterm.clone(), HashSet::new()); } } - let mut first_set = FirstSet { map, ruleset }; - while first_set.expand() {} - first_set - } -} - -impl<'a, T: TokenTag> FirstSet<'a, T> { - fn expand(&mut self) -> bool { - let nonterms = &self.ruleset.nonterms(); + let nonterms = ruleset.nonterms(); let nonterms = nonterms .iter() .map(|relem| match relem { - RuleElem::NonTerm(nonterm) => nonterm, + RuleElem::NonTerm(nonterm) => nonterm.as_str(), _ => unreachable!(), }) .collect::>(); - let mut modified = false; - for &nonterm in &nonterms { - for rhs_nonterm in rhs_nonterms(self.ruleset, nonterm) { - let cand_terms = self.map.get(rhs_nonterm).unwrap().clone(); - let lhs_terms = self.map.get_mut(nonterm).unwrap(); - for term in cand_terms { - if !lhs_terms.contains(&term) { - lhs_terms.push(term); - modified = true; - } - } - } + FirstSetBuilder { + map, + ruleset, + nonterms, } - modified } } -impl<'a, T: TokenTag> FirstSet<'a, T> { - pub fn get(&self, nonterm: &str) -> Option<&[&T]> { - self.map.get(nonterm).map(|terms| terms.as_slice()) +impl<'a, T: TokenTag> FirstSetBuilder<'a, T> { + fn expand(mut self) -> Self { + while self.expand_child() {} + self } -} -fn cmp_nonterm(relem: &RuleElem, lhs: &str) -> bool { - match relem { - RuleElem::NonTerm(nonterm) => nonterm == lhs, - _ => false, + fn expand_child(&mut self) -> bool { + let mut modified = false; + for &nonterm in &self.nonterms { + let old_len = self.map.get(nonterm).unwrap().len(); + for first_symbol in rhs_first_symbol(self.ruleset, nonterm) { + match first_symbol { + RuleElem::NonTerm(first_nonterm) => { + let cand_terms = self.map.get(first_nonterm).unwrap().clone(); + self.map.get_mut(nonterm).unwrap().extend(cand_terms); + }, + _ => { self.map.get_mut(nonterm).unwrap().insert(first_symbol); } + } + } + modified |= old_len != self.map.get(nonterm).unwrap().len(); + } + modified } } -fn rhs_terms<'a, T>(ruleset: &'a RuleSet, nonterm: &str) -> impl Iterator +fn rhs_first_symbol<'a, T>(ruleset: &'a RuleSet, nonterm: &str) -> impl Iterator> where T: TokenTag, { - ruleset.rules - .iter() - .filter(move |rule| cmp_nonterm(&rule.lhs, nonterm)) - .flat_map(|rule| &rule.rhs) - .flat_map(|relem| match relem { - RuleElem::Term(term) => Some(term), - _ => None, - }) -} + let cmp_nonterm = |relem: &RuleElem, lhs: &str| match relem { + RuleElem::NonTerm(nonterm) => nonterm == lhs, + _ => false, + }; -fn rhs_nonterms<'a, T>(ruleset: &'a RuleSet, nonterm: &str) -> impl Iterator -where - T: TokenTag, -{ ruleset.rules .iter() .filter(move |rule| cmp_nonterm(&rule.lhs, nonterm)) - .flat_map(|rule| &rule.rhs) - .flat_map(|relem| match relem { - RuleElem::NonTerm(nonterm) => Some(nonterm.as_str()), - _ => None, - }) + .flat_map(|rule| rule.rhs.first()) } #[cfg(test)] @@ -117,22 +125,48 @@ mod test { S, #[rule(" ::= A")] A, - #[rule(" ::= A B")] + #[rule(" ::= B")] B, + #[rule(" ::= ")] + C, + } + + fn eq_symbols(lhs: &[&RuleElem], rhs: &[RuleElem]) -> bool + where + T: TokenTag, + { + if lhs.len() != rhs.len() { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + for lelem in lhs { + if !rhs.contains(lelem) { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + } + return true; } #[test] fn first_set() { + macro_rules! term { + ($expr:ident) => { RuleElem::new_term(TestToken::$expr) }; + } + let ruleset = TestRule::default().into_ruleset(); let first_set = FirstSet::from(&ruleset); - let expected = vec![&TestToken::A, &TestToken::B]; - assert_eq!(first_set.get("S"), Some(expected.as_slice())); + let expected = vec![term!(A)]; + assert!(eq_symbols(first_set.get("S").unwrap(), expected.as_slice())); + + let expected = vec![term!(A)]; + assert!(eq_symbols(first_set.get("A").unwrap(), expected.as_slice())); - let expected = vec![&TestToken::A]; - assert_eq!(first_set.get("A"), Some(expected.as_slice())); + let expected = vec![term!(A)]; + assert!(eq_symbols(first_set.get("B").unwrap(), expected.as_slice())); - let expected = vec![&TestToken::A, &TestToken::B]; - assert_eq!(first_set.get("B"), Some(expected.as_slice())); + let expected = vec![RuleElem::Epsilon]; + assert!(eq_symbols(first_set.get("C").unwrap(), expected.as_slice())); } } From 10c42112f7bc0c01765b5482da3fd0ee22261a9a Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 9 Nov 2024 21:05:26 +0900 Subject: [PATCH 04/48] =?UTF-8?q?[add]=20Follow=20=E6=A7=8B=E9=80=A0?= =?UTF-8?q?=E4=BD=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/rule.rs | 1 + crates/parse_lr_common/src/rule/follow.rs | 187 ++++++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 crates/parse_lr_common/src/rule/follow.rs diff --git a/crates/parse_lr_common/src/rule.rs b/crates/parse_lr_common/src/rule.rs index a309520..2d8ef97 100644 --- a/crates/parse_lr_common/src/rule.rs +++ b/crates/parse_lr_common/src/rule.rs @@ -2,3 +2,4 @@ mod first; mod follow; pub use first::FirstSet; +pub use follow::FollowSet; diff --git a/crates/parse_lr_common/src/rule/follow.rs b/crates/parse_lr_common/src/rule/follow.rs new file mode 100644 index 0000000..fbe4e4b --- /dev/null +++ b/crates/parse_lr_common/src/rule/follow.rs @@ -0,0 +1,187 @@ +use std::collections::{HashMap, HashSet}; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleElem, RuleSet}; + +use crate::rule::FirstSet; + +pub struct FollowSet<'a, T: TokenTag> { + map: HashMap>>, + _ruleset: &'a RuleSet, +} + +impl<'a, T: TokenTag> From<&'a RuleSet> for FollowSet<'a, T> { + fn from(ruleset: &'a RuleSet) -> Self { + let build = FollowSetBuilder::from(ruleset).expand(); + let map = build.map + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect())) + .collect(); + + FollowSet { + map, + _ruleset: ruleset, + } + } +} + +impl<'a, T: TokenTag> FollowSet<'a, T> { + pub fn get(&self, nonterm: &str) -> Option<&[&RuleElem]> { + self.map.get(nonterm).map(|terms| terms.as_slice()) + } +} + +pub struct FollowSetBuilder<'a, T: TokenTag> { + map: HashMap>>, + ruleset: &'a RuleSet, +} + +impl<'a, T: TokenTag> From<&'a RuleSet> for FollowSetBuilder<'a, T> { + fn from(ruleset: &'a RuleSet) -> Self { + let mut map = HashMap::new(); + for nonterm in ruleset.nonterms() { + if let RuleElem::NonTerm(nonterm) = nonterm { + map.insert(nonterm.clone(), HashSet::new()); + } + } + map.get_mut(&ruleset.top).unwrap().insert(&RuleElem::EOF); + + FollowSetBuilder { + map, + ruleset, + } + } +} + +impl<'a, T: TokenTag> FollowSetBuilder<'a, T> { + fn expand(mut self) -> Self { + while self.expand_child() {} + self + } + + fn expand_child(&mut self) -> bool { + let first_set = FirstSet::from(self.ruleset); + + let mut modified = false; + for rule in &self.ruleset.rules { + let lhs = match &rule.lhs { + RuleElem::NonTerm(s) => s.as_str(), + _ => unreachable!(), + }; + for rhs_idx in 0..rule.rhs.len() { + let target = &rule.rhs[rhs_idx]; + let follow_symbols = &rule.rhs[rhs_idx+1..]; + let prob_first_symbols = first_by(&first_set, follow_symbols); + modified |= self.append_by_first(target, &prob_first_symbols); + if prob_first_symbols.contains(&&RuleElem::Epsilon) { + modified |= self.append_when_nullable(target, lhs); + } + } + } + modified + } + + fn append_by_first(&mut self, target: &RuleElem, first_symbol: &[&'a RuleElem]) -> bool { + if let RuleElem::NonTerm(nonterm) = target { + let old_idx = self.map.get(nonterm).unwrap().len(); + let first_symbol = first_symbol.iter().filter(|relem| **relem != &RuleElem::Epsilon); + self.map.get_mut(nonterm).unwrap().extend(first_symbol); + old_idx != self.map.get(nonterm).unwrap().len() + } else { + false + } + } + + fn append_when_nullable(&mut self, target: &RuleElem, lhs: &str) -> bool { + if let RuleElem::NonTerm(nonterm) = target { + let lhs_follow = self.map.get(lhs).unwrap().clone(); + let old_idx = self.map.get(nonterm).unwrap().len(); + self.map.get_mut(nonterm).unwrap().extend(lhs_follow); + old_idx != self.map.get(nonterm).unwrap().len() + } else { + false + } + } +} + +fn first_by<'a, T: TokenTag>(first_set: &FirstSet<'a, T>, relems: &'a [RuleElem]) -> Vec<&'a RuleElem > { + if relems.is_empty() { + vec![&RuleElem::Epsilon] + } else { + match &relems[0] { + RuleElem::NonTerm(s) => first_set.get(s.as_str()).unwrap().to_vec(), + t@RuleElem::Term(_) => vec![t], + _ => unreachable!(), + } + } +} + +#[cfg(test)] +mod test { + use copager_cfg::token::TokenTag; + use copager_cfg::rule::{Rule, RuleTag, RuleElem}; + use copager_lex::LexSource; + use copager_parse::ParseSource; + + use super::FollowSet; + + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] + enum TestToken { + #[token(r"a")] + A, + #[token(r"b")] + B, + } + + #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] + enum TestRule { + #[default] + #[rule(" ::= ")] + S, + #[rule(" ::= A")] + A, + #[rule(" ::= B")] + B, + #[rule(" ::= ")] + C, + } + + fn eq_symbols(lhs: &[&RuleElem], rhs: &[RuleElem]) -> bool + where + T: TokenTag, + { + if lhs.len() != rhs.len() { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + for lelem in lhs { + if !rhs.contains(lelem) { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + } + return true; + } + + #[test] + fn follow_set() { + macro_rules! term { + ($expr:ident) => { RuleElem::new_term(TestToken::$expr) }; + } + + let ruleset = TestRule::default().into_ruleset(); + let follow_set = FollowSet::from(&ruleset); + + let expected = vec![term!(B), RuleElem::EOF]; + assert!(eq_symbols(follow_set.get("S").unwrap(), expected.as_slice())); + + let expected = vec![term!(A)]; + assert!(eq_symbols(follow_set.get("A").unwrap(), expected.as_slice())); + + let expected = vec![term!(B), RuleElem::EOF]; + assert!(eq_symbols(follow_set.get("B").unwrap(), expected.as_slice())); + + let expected = vec![]; + assert!(eq_symbols(follow_set.get("C").unwrap(), expected.as_slice())); + } +} From 0c2592609d465e87b6176b5eccf566204efb1f8f Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 9 Nov 2024 21:53:37 +0900 Subject: [PATCH 05/48] =?UTF-8?q?[add]=20DirectorSet=20=E6=A7=8B=E9=80=A0?= =?UTF-8?q?=E4=BD=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/rule.rs | 15 +- crates/parse_lr_common/src/rule.rs | 2 + crates/parse_lr_common/src/rule/director.rs | 170 ++++++++++++++++++++ crates/parse_lr_common/src/rule/follow.rs | 2 +- 4 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 crates/parse_lr_common/src/rule/director.rs diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index f513644..9c6586d 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -11,7 +11,7 @@ where fn as_rules(&self) -> Vec>; } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Eq)] pub struct Rule { pub id: usize, pub lhs: RuleElem, @@ -24,6 +24,19 @@ impl From<(RuleElem, Vec>)> for Rule { } } +impl PartialEq for Rule { + fn eq(&self, other: &Self) -> bool { + self.lhs == other.lhs && self.rhs == other.rhs + } +} + +impl Hash for Rule { + fn hash(&self, state: &mut H) { + self.lhs.hash(state); + self.rhs.hash(state); + } +} + impl Rule { pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { let mut l_nonterms = vec![&self.lhs]; diff --git a/crates/parse_lr_common/src/rule.rs b/crates/parse_lr_common/src/rule.rs index 2d8ef97..6eb0269 100644 --- a/crates/parse_lr_common/src/rule.rs +++ b/crates/parse_lr_common/src/rule.rs @@ -1,5 +1,7 @@ mod first; mod follow; +mod director; pub use first::FirstSet; pub use follow::FollowSet; +pub use director::DirectorSet; diff --git a/crates/parse_lr_common/src/rule/director.rs b/crates/parse_lr_common/src/rule/director.rs new file mode 100644 index 0000000..4e804bb --- /dev/null +++ b/crates/parse_lr_common/src/rule/director.rs @@ -0,0 +1,170 @@ +use std::collections::{HashMap, HashSet}; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet}; + +use crate::rule::{FirstSet, FollowSet}; + +pub struct DirectorSet<'a, T: TokenTag> { + map: HashMap<&'a Rule, Vec<&'a RuleElem>>, + _ruleset: &'a RuleSet, +} + +impl<'a, T: TokenTag> From<&'a RuleSet> for DirectorSet<'a, T> { + fn from(ruleset: &'a RuleSet) -> Self { + let build = DirectorSetBuilder::from(ruleset).calc(); + let map = build.map + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect())) + .collect(); + + DirectorSet { + map, + _ruleset: ruleset, + } + } +} + +impl <'a, T: TokenTag> DirectorSet<'a, T> { + pub fn get(&self, rule: &Rule) -> Option<&[&'a RuleElem]> { + self.map.get(rule).map(|elems| elems.as_slice()) + } +} + +struct DirectorSetBuilder<'a, T: TokenTag> { + map: HashMap<&'a Rule, HashSet<&'a RuleElem>>, + ruleset: &'a RuleSet, + first_set: FirstSet<'a, T>, + follow_set: FollowSet<'a, T>, +} + +impl<'a, T: TokenTag> From<&'a RuleSet> for DirectorSetBuilder<'a, T> { + fn from(ruleset: &'a RuleSet) -> Self { + let first_set = FirstSet::from(ruleset); + let follow_set = FollowSet::from(ruleset); + + DirectorSetBuilder { + map: HashMap::new(), + ruleset, + first_set, + follow_set, + } + } +} + +impl<'a, T: TokenTag> DirectorSetBuilder<'a, T> { + fn calc(mut self) -> Self { + for rule in &self.ruleset.rules { + self.calc_once(rule); + } + self + } + + fn calc_once(&mut self, rule: &'a Rule) { + let lhs = match &rule.lhs { + RuleElem::NonTerm(s) => s.as_str(), + _ => unreachable!(), + }; + + let rhs_firsts = self.first_by(&rule.rhs); + let cand_elems = if !rhs_firsts.contains(&&RuleElem::Epsilon) { + rhs_firsts + } else { + let mut cand_elems = rhs_firsts; + cand_elems.extend_from_slice(self.follow_set.get(&lhs).unwrap()); + cand_elems + }; + + let director_elems = cand_elems + .into_iter() + .filter(|&e| *e != RuleElem::Epsilon) + .collect(); + self.map.insert(rule, director_elems); + } + + fn first_by(&self, relems: &'a [RuleElem]) -> Vec<&'a RuleElem > { + if relems.is_empty() { + vec![&RuleElem::Epsilon] + } else { + match &relems[0] { + RuleElem::NonTerm(s) => self.first_set.get(s.as_str()).unwrap().to_vec(), + t@RuleElem::Term(_) => vec![t], + _ => vec![], + } + } + } +} + +#[cfg(test)] +mod test { + use copager_cfg::token::TokenTag; + use copager_cfg::rule::{Rule, RuleTag, RuleElem}; + use copager_lex::LexSource; + use copager_parse::ParseSource; + + use super::DirectorSet; + + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] + enum TestToken { + #[token(r"a")] + A, + #[token(r"b")] + B, + } + + #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] + enum TestRule { + #[default] + #[rule(" ::= ")] + S, + #[rule(" ::= A")] + A, + #[rule(" ::= B")] + B, + #[rule(" ::= ")] + C, + } + + fn eq_symbols(lhs: &[&RuleElem], rhs: &[RuleElem]) -> bool + where + T: TokenTag, + { + if lhs.len() != rhs.len() { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + for lelem in lhs { + if !rhs.contains(lelem) { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + } + return true; + } + + #[test] + fn follow_set() { + macro_rules! term { + ($expr:ident) => { RuleElem::new_term(TestToken::$expr) }; + } + + let ruleset = TestRule::default().into_ruleset(); + let director_set = DirectorSet::from(&ruleset); + + let rule = &TestRule::S.as_rules()[0]; + let expected = vec![term!(A)]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + + let rule = &TestRule::A.as_rules()[0]; + let expected = vec![term!(A)]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + + let rule = &TestRule::B.as_rules()[0]; + let expected = vec![term!(A)]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + + let rule = &TestRule::C.as_rules()[0]; + let expected = vec![]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + } +} diff --git a/crates/parse_lr_common/src/rule/follow.rs b/crates/parse_lr_common/src/rule/follow.rs index fbe4e4b..cba7914 100644 --- a/crates/parse_lr_common/src/rule/follow.rs +++ b/crates/parse_lr_common/src/rule/follow.rs @@ -26,7 +26,7 @@ impl<'a, T: TokenTag> From<&'a RuleSet> for FollowSet<'a, T> { } impl<'a, T: TokenTag> FollowSet<'a, T> { - pub fn get(&self, nonterm: &str) -> Option<&[&RuleElem]> { + pub fn get(&self, nonterm: &str) -> Option<&[&'a RuleElem]> { self.map.get(nonterm).map(|terms| terms.as_slice()) } } From 19b7896bad0f78fbfbb19d5faad116730cba5b62 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 9 Nov 2024 22:01:42 +0900 Subject: [PATCH 06/48] =?UTF-8?q?[clean]=20parse=5Flr=5Fcommon=20=E5=86=85?= =?UTF-8?q?=E3=81=A7=E3=81=AE=20gen=5Fblocks=20=E6=9C=89=E5=8A=B9=E5=8C=96?= =?UTF-8?q?=E3=82=92=E7=84=A1=E3=81=97=E3=81=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs index 20d9298..90d8760 100644 --- a/crates/parse_lr_common/src/lib.rs +++ b/crates/parse_lr_common/src/lib.rs @@ -1,3 +1 @@ -#![feature(gen_blocks)] - pub mod rule; From a6bebd2b594eaa25dfd0240b633ecb68c2496cdb Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 9 Nov 2024 22:03:21 +0900 Subject: [PATCH 07/48] [change] parse_lr_common -> parse_common --- Cargo.lock | 22 +++++++++---------- Cargo.toml | 2 +- .../Cargo.toml | 2 +- .../src/lib.rs | 0 .../src/rule.rs | 0 .../src/rule/director.rs | 0 .../src/rule/first.rs | 0 .../src/rule/follow.rs | 0 8 files changed, 13 insertions(+), 13 deletions(-) rename crates/{parse_lr_common => parse_common}/Cargo.toml (90%) rename crates/{parse_lr_common => parse_common}/src/lib.rs (100%) rename crates/{parse_lr_common => parse_common}/src/rule.rs (100%) rename crates/{parse_lr_common => parse_common}/src/rule/director.rs (100%) rename crates/{parse_lr_common => parse_common}/src/rule/first.rs (100%) rename crates/{parse_lr_common => parse_common}/src/rule/follow.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 0479efb..33dc589 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -167,6 +167,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_parse_common" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "copager_parse", + "thiserror", +] + [[package]] name = "copager_parse_derive" version = "0.2.0" @@ -198,17 +209,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "copager_parse_lr_common" -version = "0.2.0" -dependencies = [ - "anyhow", - "copager_cfg", - "copager_lex", - "copager_parse", - "thiserror", -] - [[package]] name = "copager_utils" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index c155b5b..8b6c49b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,8 +56,8 @@ members = [ "./crates/lex_derive", "./crates/lex_regex", "./crates/parse", + "./crates/parse_common", "./crates/parse_derive", - "./crates/parse_lr_common", "./crates/parse_lr1", "./crates/ir", "./crates/ir_void", diff --git a/crates/parse_lr_common/Cargo.toml b/crates/parse_common/Cargo.toml similarity index 90% rename from crates/parse_lr_common/Cargo.toml rename to crates/parse_common/Cargo.toml index cbf10b2..0351c48 100644 --- a/crates/parse_lr_common/Cargo.toml +++ b/crates/parse_common/Cargo.toml @@ -1,7 +1,7 @@ cargo-features = ["edition2024"] [package] -name = "copager_parse_lr_common" +name = "copager_parse_common" version = "0.2.0" edition = "2024" diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_common/src/lib.rs similarity index 100% rename from crates/parse_lr_common/src/lib.rs rename to crates/parse_common/src/lib.rs diff --git a/crates/parse_lr_common/src/rule.rs b/crates/parse_common/src/rule.rs similarity index 100% rename from crates/parse_lr_common/src/rule.rs rename to crates/parse_common/src/rule.rs diff --git a/crates/parse_lr_common/src/rule/director.rs b/crates/parse_common/src/rule/director.rs similarity index 100% rename from crates/parse_lr_common/src/rule/director.rs rename to crates/parse_common/src/rule/director.rs diff --git a/crates/parse_lr_common/src/rule/first.rs b/crates/parse_common/src/rule/first.rs similarity index 100% rename from crates/parse_lr_common/src/rule/first.rs rename to crates/parse_common/src/rule/first.rs diff --git a/crates/parse_lr_common/src/rule/follow.rs b/crates/parse_common/src/rule/follow.rs similarity index 100% rename from crates/parse_lr_common/src/rule/follow.rs rename to crates/parse_common/src/rule/follow.rs From 9afc03353590c230f23857795702d2678f26ea11 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 9 Nov 2024 22:07:15 +0900 Subject: [PATCH 08/48] =?UTF-8?q?[add]=20dev=20=E3=83=95=E3=83=A9=E3=82=B0?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 + Cargo.toml | 11 ++++++++++- README.md | 1 + src/lib.rs | 5 +++++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 33dc589..ecbe82f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,6 +32,7 @@ dependencies = [ "copager_lex", "copager_lex_regex", "copager_parse", + "copager_parse_common", "copager_parse_lr1", "example_lang_arithmetic", "example_lang_json", diff --git a/Cargo.toml b/Cargo.toml index 8b6c49b..1aa8fe5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ copager_cfg = { path = "./crates/cfg" } copager_lex = { path = "./crates/lex", optional = true } copager_lex_regex = { path = "./crates/lex_regex", optional = true } copager_parse = { path = "./crates/parse", optional = true } +copager_parse_common = { path = "./crates/parse_common", optional = true } copager_parse_lr1 = { path = "./crates/parse_lr1", optional = true } copager_ir = { path = "./crates/ir" } copager_ir_void = { path = "./crates/ir_void", optional = true } @@ -29,11 +30,19 @@ example_lang_pl0 = { path = "./examples/lang_pl0" } example_lang_xml = { path = "./examples/lang_xml" } [features] +# all +all = [ + "prebuild", "derive", "dev", # common + "regexlex", # lex + "lr1", # parse + "void", "sexp" # ir +] + # common default = ["dep:copager_lex", "dep:copager_parse"] -all = ["prebuild", "derive", "regexlex", "lr1", "void", "sexp"] prebuild = ["dep:serde_json"] derive = ["copager_lex/derive", "copager_parse/derive"] +dev = ["dep:copager_parse_common"] # lex regexlex = ["dep:copager_lex_regex"] diff --git a/README.md b/README.md index 2a6ade1..bf2559e 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ Rust製パーサジェネレータ - `all` - `derive` - `prebuild` +- `dev` ### Lex diff --git a/src/lib.rs b/src/lib.rs index 5b802dc..7a458e0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,3 +34,8 @@ pub mod prelude { pub use copager_cfg::rule::{RuleTag, Rule, RuleElem}; pub use copager_cfg::token::TokenTag; } + +#[cfg(feature = "dev")] +pub mod dev { + pub use copager_parse_common::*; +} From dd8f500e71e5e73271a4dc2ec49dbcb0c2092a75 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 9 Nov 2024 22:21:40 +0900 Subject: [PATCH 09/48] =?UTF-8?q?[add]=20parse=5Flr=5Fcommon=20=E4=BB=AE?= =?UTF-8?q?=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 8 ++++++++ Cargo.toml | 1 + crates/parse_lr_common/Cargo.toml | 10 ++++++++++ crates/parse_lr_common/src/lib.rs | 0 4 files changed, 19 insertions(+) create mode 100644 crates/parse_lr_common/Cargo.toml create mode 100644 crates/parse_lr_common/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index ecbe82f..4912b66 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -210,6 +210,14 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_parse_lr_common" +version = "0.2.0" +dependencies = [ + "anyhow", + "thiserror", +] + [[package]] name = "copager_utils" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index 1aa8fe5..2cc5faa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,6 +67,7 @@ members = [ "./crates/parse", "./crates/parse_common", "./crates/parse_derive", + "./crates/parse_lr_common", "./crates/parse_lr1", "./crates/ir", "./crates/ir_void", diff --git a/crates/parse_lr_common/Cargo.toml b/crates/parse_lr_common/Cargo.toml new file mode 100644 index 0000000..c65bf1d --- /dev/null +++ b/crates/parse_lr_common/Cargo.toml @@ -0,0 +1,10 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_common" +version = "0.2.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs new file mode 100644 index 0000000..e69de29 From 0ecf2af657a8bada4a569f088bb86b8dcf31c6fe Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 9 Nov 2024 22:25:20 +0900 Subject: [PATCH 10/48] [change] parse_lr1 -> parse_lr_lr1 --- Cargo.lock | 26 +++++++++---------- Cargo.toml | 6 ++--- crates/core/Cargo.toml | 2 +- crates/core/tests/prebuild.rs | 2 +- crates/core/tests/simple.rs | 2 +- crates/core/tests/simple_multiple.rs | 2 +- crates/ir_sexp/Cargo.toml | 2 +- crates/ir_sexp/tests/simple.rs | 2 +- crates/{parse_lr1 => parse_lr_lr1}/Cargo.toml | 4 +-- .../src/builder.rs | 0 .../{parse_lr1 => parse_lr_lr1}/src/error.rs | 0 crates/{parse_lr1 => parse_lr_lr1}/src/lib.rs | 0 .../tests/simple.rs | 2 +- src/lib.rs | 2 +- 14 files changed, 26 insertions(+), 26 deletions(-) rename crates/{parse_lr1 => parse_lr_lr1}/Cargo.toml (88%) rename crates/{parse_lr1 => parse_lr_lr1}/src/builder.rs (100%) rename crates/{parse_lr1 => parse_lr_lr1}/src/error.rs (100%) rename crates/{parse_lr1 => parse_lr_lr1}/src/lib.rs (100%) rename crates/{parse_lr1 => parse_lr_lr1}/tests/simple.rs (98%) diff --git a/Cargo.lock b/Cargo.lock index 4912b66..d36aca8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -33,7 +33,7 @@ dependencies = [ "copager_lex_regex", "copager_parse", "copager_parse_common", - "copager_parse_lr1", + "copager_parse_lr_lr1", "example_lang_arithmetic", "example_lang_json", "example_lang_pl0", @@ -62,7 +62,7 @@ dependencies = [ "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_lr_lr1", "copager_utils", "serde", "serde_cbor", @@ -102,7 +102,7 @@ dependencies = [ "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_lr_lr1", "thiserror", ] @@ -194,7 +194,15 @@ dependencies = [ ] [[package]] -name = "copager_parse_lr1" +name = "copager_parse_lr_common" +version = "0.2.0" +dependencies = [ + "anyhow", + "thiserror", +] + +[[package]] +name = "copager_parse_lr_lr1" version = "0.2.0" dependencies = [ "anyhow", @@ -203,21 +211,13 @@ dependencies = [ "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_lr_lr1", "copager_utils", "itertools", "serde", "thiserror", ] -[[package]] -name = "copager_parse_lr_common" -version = "0.2.0" -dependencies = [ - "anyhow", - "thiserror", -] - [[package]] name = "copager_utils" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index 2cc5faa..8f0e838 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ copager_lex = { path = "./crates/lex", optional = true } copager_lex_regex = { path = "./crates/lex_regex", optional = true } copager_parse = { path = "./crates/parse", optional = true } copager_parse_common = { path = "./crates/parse_common", optional = true } -copager_parse_lr1 = { path = "./crates/parse_lr1", optional = true } +copager_parse_lr_lr1 = { path = "./crates/parse_lr_lr1", optional = true } copager_ir = { path = "./crates/ir" } copager_ir_void = { path = "./crates/ir_void", optional = true } copager_ir_sexp = { path = "./crates/ir_sexp", optional = true } @@ -48,7 +48,7 @@ dev = ["dep:copager_parse_common"] regexlex = ["dep:copager_lex_regex"] # parse -lr1 = ["dep:copager_parse_lr1"] +lr1 = ["dep:copager_parse_lr_lr1"] # ir void = ["dep:copager_ir_void"] @@ -68,7 +68,7 @@ members = [ "./crates/parse_common", "./crates/parse_derive", "./crates/parse_lr_common", - "./crates/parse_lr1", + "./crates/parse_lr_lr1", "./crates/ir", "./crates/ir_void", "./crates/ir_sexp", diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 6d8bf34..6f5d292 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -19,5 +19,5 @@ copager_core = { path = "." } copager_lex = { path = "../lex", features = ["derive"]} copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } -copager_parse_lr1 = { path = "../parse_lr1" } +copager_parse_lr_lr1 = { path = "../parse_lr_lr1" } copager_ir_void = { path = "../ir_void" } diff --git a/crates/core/tests/prebuild.rs b/crates/core/tests/prebuild.rs index af7db01..2812df9 100644 --- a/crates/core/tests/prebuild.rs +++ b/crates/core/tests/prebuild.rs @@ -8,7 +8,7 @@ use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; use copager_ir_void::Void; #[derive( diff --git a/crates/core/tests/simple.rs b/crates/core/tests/simple.rs index 4a62fd6..c65b85a 100644 --- a/crates/core/tests/simple.rs +++ b/crates/core/tests/simple.rs @@ -6,7 +6,7 @@ use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; use copager_ir_void::Void; #[derive( diff --git a/crates/core/tests/simple_multiple.rs b/crates/core/tests/simple_multiple.rs index 5e8ebc0..8697c12 100644 --- a/crates/core/tests/simple_multiple.rs +++ b/crates/core/tests/simple_multiple.rs @@ -6,7 +6,7 @@ use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; use copager_ir_void::Void; #[derive( diff --git a/crates/ir_sexp/Cargo.toml b/crates/ir_sexp/Cargo.toml index b3e15ca..72e5cef 100644 --- a/crates/ir_sexp/Cargo.toml +++ b/crates/ir_sexp/Cargo.toml @@ -15,5 +15,5 @@ copager_ir = { path = "../ir" } copager_lex = { path = "../lex", features = ["derive"] } copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } -copager_parse_lr1 = { path = "../parse_lr1" } +copager_parse_lr_lr1 = { path = "../parse_lr_lr1" } copager_ir_sexp = { path = "." } diff --git a/crates/ir_sexp/tests/simple.rs b/crates/ir_sexp/tests/simple.rs index 0f42f78..126e686 100644 --- a/crates/ir_sexp/tests/simple.rs +++ b/crates/ir_sexp/tests/simple.rs @@ -3,7 +3,7 @@ use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::{LexSource, LexDriver}; use copager_lex_regex::RegexLexer; use copager_parse::{ParseSource, ParseDriver, ParseEvent}; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; use copager_ir::{IR, IRBuilder}; use copager_ir_sexp::SExp; diff --git a/crates/parse_lr1/Cargo.toml b/crates/parse_lr_lr1/Cargo.toml similarity index 88% rename from crates/parse_lr1/Cargo.toml rename to crates/parse_lr_lr1/Cargo.toml index 3d628d4..c5093de 100644 --- a/crates/parse_lr1/Cargo.toml +++ b/crates/parse_lr_lr1/Cargo.toml @@ -1,7 +1,7 @@ cargo-features = ["edition2024"] [package] -name = "copager_parse_lr1" +name = "copager_parse_lr_lr1" version = "0.2.0" edition = "2024" @@ -20,4 +20,4 @@ copager_utils = { path = "../utils" } copager_lex = { path = "../lex", features = ["derive"] } copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } -copager_parse_lr1 = { path = "../parse_lr1" } +copager_parse_lr_lr1 = { path = "./" } diff --git a/crates/parse_lr1/src/builder.rs b/crates/parse_lr_lr1/src/builder.rs similarity index 100% rename from crates/parse_lr1/src/builder.rs rename to crates/parse_lr_lr1/src/builder.rs diff --git a/crates/parse_lr1/src/error.rs b/crates/parse_lr_lr1/src/error.rs similarity index 100% rename from crates/parse_lr1/src/error.rs rename to crates/parse_lr_lr1/src/error.rs diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr_lr1/src/lib.rs similarity index 100% rename from crates/parse_lr1/src/lib.rs rename to crates/parse_lr_lr1/src/lib.rs diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr_lr1/tests/simple.rs similarity index 98% rename from crates/parse_lr1/tests/simple.rs rename to crates/parse_lr_lr1/tests/simple.rs index 1acd706..393b935 100644 --- a/crates/parse_lr1/tests/simple.rs +++ b/crates/parse_lr_lr1/tests/simple.rs @@ -5,7 +5,7 @@ use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::{LexSource, LexDriver}; use copager_lex_regex::RegexLexer; use copager_parse::{ParseSource, ParseDriver, ParseEvent}; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; #[derive( Debug, Default, Copy, Clone, Hash, PartialEq, Eq, diff --git a/src/lib.rs b/src/lib.rs index 7a458e0..9f32e7b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,7 @@ pub mod lex { pub mod parse { pub use copager_parse::*; #[cfg(feature = "lr1")] - pub use copager_parse_lr1::*; + pub use copager_parse_lr_lr1::*; } pub mod ir { From 60512f6c3ec4604740177108beca1da72cadae7e Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 00:45:43 +0900 Subject: [PATCH 11/48] =?UTF-8?q?[add]=20LR0DFA=20=E3=81=AA=E3=81=A9=20LR(?= =?UTF-8?q?0)=20=E3=81=AB=E9=96=A2=E3=81=99=E3=82=8B=E6=A7=8B=E9=80=A0?= =?UTF-8?q?=E4=BD=93=E3=82=92=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - LR0ITem - LR0ItemSet - LR0DFANode - LR0DFA --- Cargo.lock | 1 + crates/parse_lr_common/Cargo.toml | 1 + crates/parse_lr_common/src/lib.rs | 3 + crates/parse_lr_common/src/lr0.rs | 4 + crates/parse_lr_common/src/lr0/automaton.rs | 89 ++++++++++++++++ crates/parse_lr_common/src/lr0/item.rs | 106 ++++++++++++++++++++ 6 files changed, 204 insertions(+) create mode 100644 crates/parse_lr_common/src/lr0.rs create mode 100644 crates/parse_lr_common/src/lr0/automaton.rs create mode 100644 crates/parse_lr_common/src/lr0/item.rs diff --git a/Cargo.lock b/Cargo.lock index d36aca8..5665fa3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -198,6 +198,7 @@ name = "copager_parse_lr_common" version = "0.2.0" dependencies = [ "anyhow", + "copager_cfg", "thiserror", ] diff --git a/crates/parse_lr_common/Cargo.toml b/crates/parse_lr_common/Cargo.toml index c65bf1d..4498411 100644 --- a/crates/parse_lr_common/Cargo.toml +++ b/crates/parse_lr_common/Cargo.toml @@ -8,3 +8,4 @@ edition = "2024" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs index e69de29..33181fe 100644 --- a/crates/parse_lr_common/src/lib.rs +++ b/crates/parse_lr_common/src/lib.rs @@ -0,0 +1,3 @@ +#![feature(gen_blocks)] + +pub mod lr0; diff --git a/crates/parse_lr_common/src/lr0.rs b/crates/parse_lr_common/src/lr0.rs new file mode 100644 index 0000000..739cd37 --- /dev/null +++ b/crates/parse_lr_common/src/lr0.rs @@ -0,0 +1,4 @@ +pub mod item; +pub mod automaton; + +pub use automaton::LR0DFA; diff --git a/crates/parse_lr_common/src/lr0/automaton.rs b/crates/parse_lr_common/src/lr0/automaton.rs new file mode 100644 index 0000000..33b4634 --- /dev/null +++ b/crates/parse_lr_common/src/lr0/automaton.rs @@ -0,0 +1,89 @@ +use std::marker::PhantomData; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleElem, RuleSet}; + +use crate::lr0::item::LR0ItemSet; + +#[derive(Debug)] +pub struct LR0DFANode<'a, T: TokenTag> { + id: usize, + pub itemset: LR0ItemSet<'a, T>, + pub next: Vec<(&'a RuleElem, Box)>, // (cond, next_node) +} + +#[derive(Debug)] +pub struct LR0DFA<'a, T: TokenTag> { + nodes: usize, + pub top: LR0DFANode<'a, T>, +} + +impl<'a, T: TokenTag> From<&'a RuleSet> for LR0DFA<'a, T> { + fn from(ruleset: &'a RuleSet) -> Self { + let (nodes, top) = LR0DFABuilder::new().start(ruleset); + LR0DFA { nodes, top } + } +} + +impl<'a, T: TokenTag> LR0DFA<'a, T> { + pub fn all_nodes(&self) -> usize { + self.nodes + } + + pub gen fn all_edges(&self) -> (usize, usize, &'a RuleElem) { + let mut stack = vec![&self.top]; + while let Some(node) = stack.pop() { + for (cond, next_node) in &node.next { + stack.push(next_node); + yield (node.id, next_node.id, *cond); + } + } + } +} + +#[derive(Debug)] +struct LR0DFABuilder { + nodes: usize, + _phantom: PhantomData, +} + +impl<'a, T: TokenTag> LR0DFABuilder { + fn new() -> Self { + LR0DFABuilder { + nodes: 0, + _phantom: PhantomData, + } + } + + fn start(mut self, ruleset: &'a RuleSet) -> (usize, LR0DFANode<'a, T>) { + let top = RuleElem::NonTerm(ruleset.top.clone()); + let top = ruleset.rules + .iter() + .find(|rule| rule.lhs == top) + .unwrap(); + let top = LR0ItemSet::from(ruleset).init(top); + + (self.nodes, self.gen_recursive(top)) + } + + fn gen_recursive(&mut self, mut itemset: LR0ItemSet<'a, T>) -> LR0DFANode<'a, T> + where + T: TokenTag, + { + let id = self.nodes; + let next = itemset + .gen_next_sets() + .map(|(cond, next_items) | { + (cond, Box::new(self.gen_recursive(next_items))) + }) + .collect(); + self.nodes += 1; + + LR0DFANode { id, itemset, next } + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/lr0/item.rs b/crates/parse_lr_common/src/lr0/item.rs new file mode 100644 index 0000000..f90ab9a --- /dev/null +++ b/crates/parse_lr_common/src/lr0/item.rs @@ -0,0 +1,106 @@ +use std::collections::{HashMap, HashSet}; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet}; + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +pub struct LR0Item<'a, T: TokenTag> { + rule: &'a Rule, + dot_pos: usize, +} + +impl<'a, T: TokenTag> From<&'a Rule> for LR0Item<'a, T> { + fn from(rule: &'a Rule) -> Self { + LR0Item { rule, dot_pos: 0 } + } +} + +impl<'a, T: TokenTag> LR0Item<'a, T> { + pub fn gen_next(&self) -> Self { + assert!(self.dot_pos + 1 <= self.rule.rhs.len()); + LR0Item { + rule: self.rule, + dot_pos: self.dot_pos + 1, + } + } + + pub fn check_next_elem(&self) -> Option<&'a RuleElem> { + if self.dot_pos < self.rule.rhs.len() { + Some(&self.rule.rhs[self.dot_pos]) + } else { + None + } + } +} + +#[derive(Debug)] +pub struct LR0ItemSet<'a, T: TokenTag> { + items: HashSet>, + ruleset: &'a RuleSet, +} + +impl<'a, T: TokenTag> From<&'a RuleSet> for LR0ItemSet<'a, T> { + fn from(ruleset: &'a RuleSet) -> Self { + LR0ItemSet { + items: HashSet::new(), + ruleset, + } + } +} + +impl<'a, T: TokenTag> LR0ItemSet<'a, T> { + pub fn init(mut self, rule: &'a Rule) -> Self { + self.items.insert(LR0Item::from(rule)); + self + } + + pub fn gen_next_sets(&mut self) -> impl Iterator, LR0ItemSet<'a, T>)> { + self.expand(); + + let mut next_set_candidates = HashMap::new(); + self.items + .iter() + .filter_map(|item| item.check_next_elem().map(|nelem| (nelem, item))) + .for_each(|(nelem, item) | { + next_set_candidates + .entry(nelem) + .or_insert_with(HashSet::new) + .insert(item.gen_next()); + }); + + next_set_candidates + .into_iter() + .map(|(cond, items)| + (cond, LR0ItemSet { items, ruleset: self.ruleset }) + ) + } + + fn expand(&mut self) { + let new_expaned = self.items + .iter() + .flat_map(|item| self.expand_once(item)) + .flatten() + .collect::>(); + for item in new_expaned { + self.items.insert(item); + } + } + + fn expand_once(&self, item: &LR0Item<'a, T>) -> Option>> { + if let Some(nonterm@RuleElem::NonTerm(..)) = item.check_next_elem() { + let items = self.ruleset + .find_rule(nonterm) + .into_iter() + .map(|rule| LR0Item::from(rule)) + .collect(); + Some(items) + } else { + None + } + } +} + +#[cfg(test)] +mod test { + // TODO +} From 51432267aef11316edd2e62ddbeff4442dc5c8ec Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 00:48:22 +0900 Subject: [PATCH 12/48] =?UTF-8?q?[update]=20dev=20=E3=83=95=E3=83=A9?= =?UTF-8?q?=E3=82=B0=E3=81=AB=20parse=5Flr=5Fcommon=20=E3=82=AF=E3=83=AC?= =?UTF-8?q?=E3=83=BC=E3=83=88=E3=82=92=E5=90=AB=E3=82=81=E3=82=8B=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 + Cargo.toml | 3 ++- src/lib.rs | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 5665fa3..a69f64c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -33,6 +33,7 @@ dependencies = [ "copager_lex_regex", "copager_parse", "copager_parse_common", + "copager_parse_lr_common", "copager_parse_lr_lr1", "example_lang_arithmetic", "example_lang_json", diff --git a/Cargo.toml b/Cargo.toml index 8f0e838..d690add 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ copager_lex = { path = "./crates/lex", optional = true } copager_lex_regex = { path = "./crates/lex_regex", optional = true } copager_parse = { path = "./crates/parse", optional = true } copager_parse_common = { path = "./crates/parse_common", optional = true } +copager_parse_lr_common = { path = "./crates/parse_lr_common", optional = true } copager_parse_lr_lr1 = { path = "./crates/parse_lr_lr1", optional = true } copager_ir = { path = "./crates/ir" } copager_ir_void = { path = "./crates/ir_void", optional = true } @@ -42,7 +43,7 @@ all = [ default = ["dep:copager_lex", "dep:copager_parse"] prebuild = ["dep:serde_json"] derive = ["copager_lex/derive", "copager_parse/derive"] -dev = ["dep:copager_parse_common"] +dev = ["dep:copager_parse_common", "dep:copager_parse_lr_common"] # lex regexlex = ["dep:copager_lex_regex"] diff --git a/src/lib.rs b/src/lib.rs index 9f32e7b..b3c112b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,4 +38,5 @@ pub mod prelude { #[cfg(feature = "dev")] pub mod dev { pub use copager_parse_common::*; + pub use copager_parse_lr_common as lr; } From 0f595988c03ae3e644bb75ee923d1493f4d803a6 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 17:00:16 +0900 Subject: [PATCH 13/48] =?UTF-8?q?[fix]=20LR0Item=20=E3=81=A7=E3=81=AE=20Ep?= =?UTF-8?q?silon=20=E3=81=AE=E6=89=B1=E3=81=84=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/lr0/item.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/parse_lr_common/src/lr0/item.rs b/crates/parse_lr_common/src/lr0/item.rs index f90ab9a..85b21fe 100644 --- a/crates/parse_lr_common/src/lr0/item.rs +++ b/crates/parse_lr_common/src/lr0/item.rs @@ -11,7 +11,11 @@ pub struct LR0Item<'a, T: TokenTag> { impl<'a, T: TokenTag> From<&'a Rule> for LR0Item<'a, T> { fn from(rule: &'a Rule) -> Self { - LR0Item { rule, dot_pos: 0 } + if rule.rhs[0] == RuleElem::Epsilon { + LR0Item { rule, dot_pos: 1 } + } else { + LR0Item { rule, dot_pos: 0 } + } } } From 494a8f57b8936013676df25016ec7339e83e0eba Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 17:28:36 +0900 Subject: [PATCH 14/48] =?UTF-8?q?[update]=20LR0=20=E7=B3=BB=E6=A7=8B?= =?UTF-8?q?=E9=80=A0=E4=BD=93=E3=81=AE=E5=85=AC=E9=96=8B=E3=83=A1=E3=82=BD?= =?UTF-8?q?=E3=83=83=E3=83=89=E7=AD=89=E3=82=92=E8=AA=BF=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/lr0/automaton.rs | 55 +++++++++++++-------- crates/parse_lr_common/src/lr0/item.rs | 6 +-- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/crates/parse_lr_common/src/lr0/automaton.rs b/crates/parse_lr_common/src/lr0/automaton.rs index 33b4634..9733dec 100644 --- a/crates/parse_lr_common/src/lr0/automaton.rs +++ b/crates/parse_lr_common/src/lr0/automaton.rs @@ -1,43 +1,56 @@ +use std::rc::Rc; use std::marker::PhantomData; use copager_cfg::token::TokenTag; -use copager_cfg::rule::{RuleElem, RuleSet}; +use copager_cfg::rule::{Rule, RuleElem, RuleSet}; -use crate::lr0::item::LR0ItemSet; +use crate::lr0::item::{LR0Item, LR0ItemSet}; #[derive(Debug)] pub struct LR0DFANode<'a, T: TokenTag> { id: usize, pub itemset: LR0ItemSet<'a, T>, - pub next: Vec<(&'a RuleElem, Box)>, // (cond, next_node) + pub next: Vec<(&'a RuleElem, Rc)>, // (cond, next_node) +} + +impl<'a, T: TokenTag> LR0DFANode<'a, T> { + pub fn contains(&self, rule: &Rule) -> bool { + self.contains_by(|item| item.rule == rule) + } + + pub fn contains_by(&self, cond: F) -> bool + where + F: Fn(&LR0Item<'a, T>) -> bool + { + self.itemset + .items + .iter() + .any(cond) + } } #[derive(Debug)] pub struct LR0DFA<'a, T: TokenTag> { - nodes: usize, - pub top: LR0DFANode<'a, T>, + pub nodes: Vec>>, + pub edges: Vec<(usize, usize, &'a RuleElem)>, } impl<'a, T: TokenTag> From<&'a RuleSet> for LR0DFA<'a, T> { fn from(ruleset: &'a RuleSet) -> Self { - let (nodes, top) = LR0DFABuilder::new().start(ruleset); - LR0DFA { nodes, top } - } -} + let dfa_top = LR0DFABuilder::new().start(ruleset); -impl<'a, T: TokenTag> LR0DFA<'a, T> { - pub fn all_nodes(&self) -> usize { - self.nodes - } - - pub gen fn all_edges(&self) -> (usize, usize, &'a RuleElem) { - let mut stack = vec![&self.top]; + let mut nodes = vec![]; + let mut edges = vec![]; + let mut stack = vec![Rc::new(dfa_top)]; while let Some(node) = stack.pop() { + nodes.push(Rc::clone(&node)); for (cond, next_node) in &node.next { - stack.push(next_node); - yield (node.id, next_node.id, *cond); + edges.push((node.id, next_node.id, *cond)); + stack.push(Rc::clone(next_node)); } } + + LR0DFA { nodes, edges } } } @@ -55,7 +68,7 @@ impl<'a, T: TokenTag> LR0DFABuilder { } } - fn start(mut self, ruleset: &'a RuleSet) -> (usize, LR0DFANode<'a, T>) { + fn start(mut self, ruleset: &'a RuleSet) -> LR0DFANode<'a, T> { let top = RuleElem::NonTerm(ruleset.top.clone()); let top = ruleset.rules .iter() @@ -63,7 +76,7 @@ impl<'a, T: TokenTag> LR0DFABuilder { .unwrap(); let top = LR0ItemSet::from(ruleset).init(top); - (self.nodes, self.gen_recursive(top)) + self.gen_recursive(top) } fn gen_recursive(&mut self, mut itemset: LR0ItemSet<'a, T>) -> LR0DFANode<'a, T> @@ -74,7 +87,7 @@ impl<'a, T: TokenTag> LR0DFABuilder { let next = itemset .gen_next_sets() .map(|(cond, next_items) | { - (cond, Box::new(self.gen_recursive(next_items))) + (cond, Rc::new(self.gen_recursive(next_items))) }) .collect(); self.nodes += 1; diff --git a/crates/parse_lr_common/src/lr0/item.rs b/crates/parse_lr_common/src/lr0/item.rs index 85b21fe..06fb718 100644 --- a/crates/parse_lr_common/src/lr0/item.rs +++ b/crates/parse_lr_common/src/lr0/item.rs @@ -5,8 +5,8 @@ use copager_cfg::rule::{Rule, RuleElem, RuleSet}; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] pub struct LR0Item<'a, T: TokenTag> { - rule: &'a Rule, - dot_pos: usize, + pub rule: &'a Rule, + pub dot_pos: usize, } impl<'a, T: TokenTag> From<&'a Rule> for LR0Item<'a, T> { @@ -39,7 +39,7 @@ impl<'a, T: TokenTag> LR0Item<'a, T> { #[derive(Debug)] pub struct LR0ItemSet<'a, T: TokenTag> { - items: HashSet>, + pub items: HashSet>, ruleset: &'a RuleSet, } From 2bd8da32f626da309df7044c8ae2e9d9b327b55f Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 17:33:40 +0900 Subject: [PATCH 15/48] =?UTF-8?q?[fix]=20LR0ItemSet::expand=20=E3=82=92?= =?UTF-8?q?=E5=8D=81=E5=88=86=E3=81=AA=E5=9B=9E=E6=95=B0=E5=AE=9F=E8=A1=8C?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/lr0/item.rs | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/crates/parse_lr_common/src/lr0/item.rs b/crates/parse_lr_common/src/lr0/item.rs index 06fb718..2c09740 100644 --- a/crates/parse_lr_common/src/lr0/item.rs +++ b/crates/parse_lr_common/src/lr0/item.rs @@ -80,24 +80,26 @@ impl<'a, T: TokenTag> LR0ItemSet<'a, T> { } fn expand(&mut self) { - let new_expaned = self.items - .iter() - .flat_map(|item| self.expand_once(item)) - .flatten() - .collect::>(); - for item in new_expaned { - self.items.insert(item); + let mut modified = true; + while modified { + modified = false; + let new_expaned = self.items + .iter() + .flat_map(|item| self.expand_once(item)) + .flatten() + .collect::>(); + for item in new_expaned { + modified |= self.items.insert(item); + } } } - fn expand_once(&self, item: &LR0Item<'a, T>) -> Option>> { + fn expand_once(&self, item: &LR0Item<'a, T>) -> Option>> { if let Some(nonterm@RuleElem::NonTerm(..)) = item.check_next_elem() { - let items = self.ruleset + Some(self.ruleset .find_rule(nonterm) .into_iter() - .map(|rule| LR0Item::from(rule)) - .collect(); - Some(items) + .map(|rule| LR0Item::from(rule))) } else { None } From 937bbc2a1bdd441087c75e1ec7a17731d1305dcb Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 18:25:21 +0900 Subject: [PATCH 16/48] =?UTF-8?q?[add]=20LRTable=20=E7=B3=BB=E6=A7=8B?= =?UTF-8?q?=E9=80=A0=E4=BD=93=E3=82=92=E5=AE=9F=E8=A3=85=20(=E4=B8=80?= =?UTF-8?q?=E9=83=A8=E4=BB=AE)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/automaton.rs | 7 ++ crates/parse_lr_common/src/lib.rs | 2 + crates/parse_lr_common/src/lr0.rs | 4 +- .../src/lr0/{automaton.rs => dfa.rs} | 11 +++ crates/parse_lr_common/src/table.rs | 93 +++++++++++++++++++ 5 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 crates/parse_lr_common/src/automaton.rs rename crates/parse_lr_common/src/lr0/{automaton.rs => dfa.rs} (89%) create mode 100644 crates/parse_lr_common/src/table.rs diff --git a/crates/parse_lr_common/src/automaton.rs b/crates/parse_lr_common/src/automaton.rs new file mode 100644 index 0000000..8a8ae8f --- /dev/null +++ b/crates/parse_lr_common/src/automaton.rs @@ -0,0 +1,7 @@ +use copager_cfg::token::TokenTag; +use copager_cfg::rule::RuleElem; + +pub trait Automaton<'a: 'b, 'b, T: TokenTag + 'a> { + fn len(&self) -> usize; + fn edges(&'b self) -> impl Iterator)>; +} diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs index 33181fe..2a34bc0 100644 --- a/crates/parse_lr_common/src/lib.rs +++ b/crates/parse_lr_common/src/lib.rs @@ -1,3 +1,5 @@ #![feature(gen_blocks)] +mod automaton; pub mod lr0; +pub mod table; diff --git a/crates/parse_lr_common/src/lr0.rs b/crates/parse_lr_common/src/lr0.rs index 739cd37..dbf522f 100644 --- a/crates/parse_lr_common/src/lr0.rs +++ b/crates/parse_lr_common/src/lr0.rs @@ -1,4 +1,4 @@ pub mod item; -pub mod automaton; +pub mod dfa; -pub use automaton::LR0DFA; +pub use dfa::LR0DFA; diff --git a/crates/parse_lr_common/src/lr0/automaton.rs b/crates/parse_lr_common/src/lr0/dfa.rs similarity index 89% rename from crates/parse_lr_common/src/lr0/automaton.rs rename to crates/parse_lr_common/src/lr0/dfa.rs index 9733dec..385be8e 100644 --- a/crates/parse_lr_common/src/lr0/automaton.rs +++ b/crates/parse_lr_common/src/lr0/dfa.rs @@ -4,6 +4,7 @@ use std::marker::PhantomData; use copager_cfg::token::TokenTag; use copager_cfg::rule::{Rule, RuleElem, RuleSet}; +use crate::automaton::Automaton; use crate::lr0::item::{LR0Item, LR0ItemSet}; #[derive(Debug)] @@ -54,6 +55,16 @@ impl<'a, T: TokenTag> From<&'a RuleSet> for LR0DFA<'a, T> { } } +impl<'a: 'b, 'b, T: TokenTag> Automaton<'a, 'b, T> for LR0DFA<'a, T> { + fn len(&self) -> usize { + self.nodes.len() + } + + fn edges(&'b self) -> impl Iterator)> { + self.edges.iter() + } +} + #[derive(Debug)] struct LR0DFABuilder { nodes: usize, diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs new file mode 100644 index 0000000..61e228b --- /dev/null +++ b/crates/parse_lr_common/src/table.rs @@ -0,0 +1,93 @@ +use std::collections::HashMap; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem}; + +use crate::automaton::Automaton; + +#[derive(Debug)] +pub enum LRAction { + Shift(usize), + Reduce(usize, usize, Rule), // goto_id, elems_cnt, rule + Accept, + None, +} + +#[derive(Debug)] +pub struct LRTable { // R = Rule + pub action_table: Vec>>, + pub eof_action_table: Vec>, + pub goto_table: Vec>, +} + +impl LRTable { + pub fn get_action(&self, state: usize, token: T) -> &LRAction { + self.action_table[state].get(&token).unwrap_or(&LRAction::None) + } + + pub fn get_eof_action(&self, state: usize) -> &LRAction { + &self.eof_action_table[state] + } + + pub fn get_goto(&self, state: usize, nonterm: &str) -> Option { + self.goto_table[state].get(nonterm).copied() + } +} + +#[derive(Debug)] +pub struct LRTableBuilder { + action_table: Vec>>, + eof_action_table: Vec>, + goto_table: Vec>, +} + +impl<'a: 'b, 'b, T> LRTableBuilder +where + T: TokenTag + 'a, +{ + pub fn from(automaton: &'b impl Automaton<'a, 'b, T>) -> Self { + let size = automaton.len(); + + // 初期化 + let mut action_table: Vec>> = Vec::with_capacity(size); + let mut eof_action_table: Vec> = Vec::with_capacity(size); + let mut goto_table = Vec::with_capacity(size); + for _ in 0..size { + action_table.push(HashMap::new()); + eof_action_table.push(LRAction::None); + goto_table.push(HashMap::new()); + } + + // 表の作成 + for (from, to, elem) in automaton.edges() { + match elem { + RuleElem::Term(token) => { + action_table[*from].insert(*token, LRAction::Shift(*to)); + } + RuleElem::NonTerm(name) => { + goto_table[*from].insert(name.clone(), *to); + }, + _ => {} + } + } + + LRTableBuilder { + action_table, + eof_action_table, + goto_table, + } + } + + pub fn build(self) -> LRTable { + LRTable { + action_table: self.action_table, + eof_action_table: self.eof_action_table, + goto_table: self.goto_table, + } + } +} + +#[cfg(test)] +mod test { + // TODO +} From c42592acd8c56ada69195d1dc5bd6a866082d987 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 19:06:39 +0900 Subject: [PATCH 17/48] =?UTF-8?q?[add]=20LRDriver=20=E3=82=92=E4=BD=9C?= =?UTF-8?q?=E6=88=90=20(=E4=B8=80=E9=83=A8=E4=BB=AE)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 + crates/parse_lr_common/Cargo.toml | 1 + crates/parse_lr_common/src/driver.rs | 81 ++++++++++++++++++++++++++++ crates/parse_lr_common/src/lib.rs | 3 +- crates/parse_lr_common/src/table.rs | 63 ++++++++++++++-------- 5 files changed, 125 insertions(+), 24 deletions(-) create mode 100644 crates/parse_lr_common/src/driver.rs diff --git a/Cargo.lock b/Cargo.lock index a69f64c..cd9e6f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -200,6 +200,7 @@ version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", + "copager_parse", "thiserror", ] diff --git a/crates/parse_lr_common/Cargo.toml b/crates/parse_lr_common/Cargo.toml index 4498411..06abb98 100644 --- a/crates/parse_lr_common/Cargo.toml +++ b/crates/parse_lr_common/Cargo.toml @@ -9,3 +9,4 @@ edition = "2024" anyhow = { workspace = true } thiserror = { workspace = true } copager_cfg = { path = "../cfg" } +copager_parse = { path = "../parse" } diff --git a/crates/parse_lr_common/src/driver.rs b/crates/parse_lr_common/src/driver.rs new file mode 100644 index 0000000..ff18248 --- /dev/null +++ b/crates/parse_lr_common/src/driver.rs @@ -0,0 +1,81 @@ +use copager_cfg::token::{TokenTag, Token}; +use copager_cfg::rule::{RuleElem, RuleTag}; +use copager_parse::ParseEvent; + +use crate::table::{LRAction, LRTable}; + +pub struct LRDriver +where + T: TokenTag, + R: RuleTag, +{ + table: LRTable, + stack: Vec, +} + +impl From> for LRDriver +where + T: TokenTag, + R: RuleTag, +{ + fn from(table: LRTable) -> Self { + LRDriver { + table, + stack: vec![0], + } + } +} + +impl<'input, T, R> LRDriver +where + T: TokenTag, + R: RuleTag, +{ + pub fn reset(&mut self) { + self.stack = vec![0]; + } + + pub gen fn consume(&mut self, token: Option>) -> ParseEvent<'input, T, R> { + loop { + let top = self.stack[self.stack.len() - 1]; + match (self.table.get_action(top, token), token) { + (LRAction::Shift(new_state), Some(token)) => { + self.stack.push(*new_state); + yield ParseEvent::Read(token); + break; + }, + (LRAction::Reduce(tag, rule), _) => { + let lhs = lhs_as_str(&rule.lhs); + let rhs_len = rule.rhs.len(); + self.stack.truncate(self.stack.len() - rhs_len); + self.stack.push(self.table.get_goto(self.stack.len()-1, lhs).unwrap()); + yield ParseEvent::Parse { rule: *tag, len: rhs_len }; + }, + (LRAction::Accept, _) => { + return; + } + (LRAction::None, Some(_)) => { + // TODO + // yield ParseEvent::Err(ParseError::new_unexpected_token(token).into()); + yield ParseEvent::Err(anyhow::anyhow!("unexpected token").into()); + return; + } + (LRAction::None, None) => { + // TODO + // yield ParseEvent::Err(ParseError::UnexpectedEOF.into()); + yield ParseEvent::Err(anyhow::anyhow!("unexpected EOF").into()); + return; + } + _ => unreachable!(), + } + } + } +} + +fn lhs_as_str(lhs: &RuleElem) -> &str { + if let RuleElem::NonTerm(nt) = lhs { + nt.as_str() + } else { + unreachable!() + } +} diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs index 2a34bc0..dd25121 100644 --- a/crates/parse_lr_common/src/lib.rs +++ b/crates/parse_lr_common/src/lib.rs @@ -1,5 +1,6 @@ #![feature(gen_blocks)] mod automaton; -pub mod lr0; pub mod table; +pub mod driver; +pub mod lr0; diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs index 61e228b..51f609f 100644 --- a/crates/parse_lr_common/src/table.rs +++ b/crates/parse_lr_common/src/table.rs @@ -1,32 +1,44 @@ use std::collections::HashMap; -use copager_cfg::token::TokenTag; -use copager_cfg::rule::{Rule, RuleElem}; +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; use crate::automaton::Automaton; #[derive(Debug)] -pub enum LRAction { +pub enum LRAction +where + T: TokenTag, + R: RuleTag, +{ Shift(usize), - Reduce(usize, usize, Rule), // goto_id, elems_cnt, rule + Reduce(R, Rule), // elems_cnt, rule Accept, None, } #[derive(Debug)] -pub struct LRTable { // R = Rule - pub action_table: Vec>>, - pub eof_action_table: Vec>, - pub goto_table: Vec>, +pub struct LRTable +where + T: TokenTag, + R: RuleTag, +{ + action_table: Vec>>, + eof_action_table: Vec>, + goto_table: Vec>, } -impl LRTable { - pub fn get_action(&self, state: usize, token: T) -> &LRAction { - self.action_table[state].get(&token).unwrap_or(&LRAction::None) - } - - pub fn get_eof_action(&self, state: usize) -> &LRAction { - &self.eof_action_table[state] +impl LRTable +where + T: TokenTag, + R: RuleTag, +{ + pub fn get_action(&self, state: usize, token: Option>) -> &LRAction { + if let Some(token) = token { + return &self.action_table[state].get(&token.kind).unwrap_or(&LRAction::None) + } else { + return &self.eof_action_table[state] + } } pub fn get_goto(&self, state: usize, nonterm: &str) -> Option { @@ -35,22 +47,27 @@ impl LRTable { } #[derive(Debug)] -pub struct LRTableBuilder { - action_table: Vec>>, - eof_action_table: Vec>, - goto_table: Vec>, +pub struct LRTableBuilder +where + T: TokenTag, + R: RuleTag, +{ + pub action_table: Vec>>, + pub eof_action_table: Vec>, + pub goto_table: Vec>, } -impl<'a: 'b, 'b, T> LRTableBuilder +impl<'a: 'b, 'b, T, R> LRTableBuilder where T: TokenTag + 'a, + R: RuleTag, { pub fn from(automaton: &'b impl Automaton<'a, 'b, T>) -> Self { let size = automaton.len(); // 初期化 - let mut action_table: Vec>> = Vec::with_capacity(size); - let mut eof_action_table: Vec> = Vec::with_capacity(size); + let mut action_table: Vec>> = Vec::with_capacity(size); + let mut eof_action_table = Vec::with_capacity(size); let mut goto_table = Vec::with_capacity(size); for _ in 0..size { action_table.push(HashMap::new()); @@ -78,7 +95,7 @@ where } } - pub fn build(self) -> LRTable { + pub fn build(self) -> LRTable { LRTable { action_table: self.action_table, eof_action_table: self.eof_action_table, From 07343597bc07c9516e93783419c0f69eea3d265a Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 20:22:33 +0900 Subject: [PATCH 18/48] =?UTF-8?q?[clean]=20=E6=9C=AA=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E5=9E=8B=E3=83=91=E3=83=A9=E3=83=A1=E3=83=BC=E3=82=BF=E3=82=92?= =?UTF-8?q?=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs index 51f609f..4470573 100644 --- a/crates/parse_lr_common/src/table.rs +++ b/crates/parse_lr_common/src/table.rs @@ -62,7 +62,7 @@ where T: TokenTag + 'a, R: RuleTag, { - pub fn from(automaton: &'b impl Automaton<'a, 'b, T>) -> Self { + pub fn from(automaton: &'b impl Automaton<'a, 'b, T>) -> Self { let size = automaton.len(); // 初期化 From 16f300001949448785ae82fe1123e4eebfe8029f Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 20:39:08 +0900 Subject: [PATCH 19/48] =?UTF-8?q?[change]=20LRDriver=20=E3=81=8C=20LRTable?= =?UTF-8?q?=20=E3=82=92=E5=8F=82=E7=85=A7=E3=81=A7=E6=8C=81=E3=81=A4?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/driver.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/parse_lr_common/src/driver.rs b/crates/parse_lr_common/src/driver.rs index ff18248..1e94f12 100644 --- a/crates/parse_lr_common/src/driver.rs +++ b/crates/parse_lr_common/src/driver.rs @@ -4,21 +4,21 @@ use copager_parse::ParseEvent; use crate::table::{LRAction, LRTable}; -pub struct LRDriver +pub struct LRDriver<'table, T, R> where T: TokenTag, R: RuleTag, { - table: LRTable, + table: &'table LRTable, stack: Vec, } -impl From> for LRDriver +impl<'table, T, R> From<&'table LRTable> for LRDriver<'table, T, R> where T: TokenTag, R: RuleTag, { - fn from(table: LRTable) -> Self { + fn from(table: &'table LRTable) -> Self { LRDriver { table, stack: vec![0], @@ -26,7 +26,7 @@ where } } -impl<'input, T, R> LRDriver +impl<'table, 'input, T, R> LRDriver<'table, T, R> where T: TokenTag, R: RuleTag, From 5378a05e465901113afaf7463b75ace37fabda09 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 20:43:01 +0900 Subject: [PATCH 20/48] [add] parse_lr_lr0 --- Cargo.lock | 12 ++++++++++ Cargo.toml | 1 + crates/parse_lr_lr0/Cargo.toml | 14 ++++++++++++ crates/parse_lr_lr0/src/lib.rs | 41 ++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+) create mode 100644 crates/parse_lr_lr0/Cargo.toml create mode 100644 crates/parse_lr_lr0/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index cd9e6f2..110546e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -204,6 +204,18 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_parse_lr_lr0" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "copager_parse", + "copager_parse_lr_common", + "thiserror", +] + [[package]] name = "copager_parse_lr_lr1" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index d690add..d098ec9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,7 @@ members = [ "./crates/parse_common", "./crates/parse_derive", "./crates/parse_lr_common", + "./crates/parse_lr_lr0", "./crates/parse_lr_lr1", "./crates/ir", "./crates/ir_void", diff --git a/crates/parse_lr_lr0/Cargo.toml b/crates/parse_lr_lr0/Cargo.toml new file mode 100644 index 0000000..5f82525 --- /dev/null +++ b/crates/parse_lr_lr0/Cargo.toml @@ -0,0 +1,14 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_lr0" +version = "0.2.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_parse_lr_common = { path = "../parse_lr_common" } diff --git a/crates/parse_lr_lr0/src/lib.rs b/crates/parse_lr_lr0/src/lib.rs new file mode 100644 index 0000000..bd31a9e --- /dev/null +++ b/crates/parse_lr_lr0/src/lib.rs @@ -0,0 +1,41 @@ +#![feature(gen_blocks)] + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::RuleTag; +use copager_lex::LexSource; +use copager_parse::{ParseDriver, ParseSource, ParseEvent}; +use copager_parse_lr_common::lr0::LR0DFA; +use copager_parse_lr_common::table::{LRTable, LRTableBuilder}; +use copager_parse_lr_common::driver::LRDriver; + +pub struct LR0 +where + T: TokenTag, + R: RuleTag +{ + table: LRTable, +} + +impl ParseDriver for LR0 +where + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from((_, source_p): (Sl, Sp)) -> anyhow::Result { + let ruleset = source_p.into_ruleset(); + let lr0_dfa = LR0DFA::from(&ruleset); + let lr_table = LRTableBuilder::from(&lr0_dfa).build(); + + Ok(LR0 { table: lr_table }) + } + + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut driver = LRDriver::from(&self.table); + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } +} From 9c015c3fc4fce0a85258265adc5ab9d2ff17460e Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 10 Nov 2024 20:44:10 +0900 Subject: [PATCH 21/48] =?UTF-8?q?[add]=20lr0=20=E3=83=95=E3=83=A9=E3=82=B0?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 + Cargo.toml | 2 ++ README.md | 1 + src/lib.rs | 2 ++ 4 files changed, 6 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 110546e..69a9a12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -34,6 +34,7 @@ dependencies = [ "copager_parse", "copager_parse_common", "copager_parse_lr_common", + "copager_parse_lr_lr0", "copager_parse_lr_lr1", "example_lang_arithmetic", "example_lang_json", diff --git a/Cargo.toml b/Cargo.toml index d098ec9..1a4d80c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ copager_lex_regex = { path = "./crates/lex_regex", optional = true } copager_parse = { path = "./crates/parse", optional = true } copager_parse_common = { path = "./crates/parse_common", optional = true } copager_parse_lr_common = { path = "./crates/parse_lr_common", optional = true } +copager_parse_lr_lr0 = { path = "./crates/parse_lr_lr0", optional = true } copager_parse_lr_lr1 = { path = "./crates/parse_lr_lr1", optional = true } copager_ir = { path = "./crates/ir" } copager_ir_void = { path = "./crates/ir_void", optional = true } @@ -49,6 +50,7 @@ dev = ["dep:copager_parse_common", "dep:copager_parse_lr_common"] regexlex = ["dep:copager_lex_regex"] # parse +lr0 = ["dep:copager_parse_lr_lr0"] lr1 = ["dep:copager_parse_lr_lr1"] # ir diff --git a/README.md b/README.md index bf2559e..818d305 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Rust製パーサジェネレータ ### Parse +- `lr0` : [crates/parse_lr0](crates/parse_lr0) - `lr1` : [crates/parse_lr1](crates/parse_lr1) ### IR diff --git a/src/lib.rs b/src/lib.rs index b3c112b..3bddbe7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,8 @@ pub mod lex { pub mod parse { pub use copager_parse::*; + #[cfg(feature = "lr0")] + pub use copager_parse_lr_lr0::*; #[cfg(feature = "lr1")] pub use copager_parse_lr_lr1::*; } From 7e352a62fcd195c539f0da0c73b0fcc54821d159 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 11 Nov 2024 16:53:43 +0900 Subject: [PATCH 22/48] =?UTF-8?q?[update]=20Rule=20=E6=A7=8B=E9=80=A0?= =?UTF-8?q?=E4=BD=93=E3=81=8C=20RuleTag=20=E3=82=92=E6=8C=81=E3=81=A4?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/rule.rs | 64 +++++++--- crates/parse/src/lib.rs | 4 +- crates/parse_common/src/rule/director.rs | 62 ++++++---- crates/parse_common/src/rule/first.rs | 52 ++++++-- crates/parse_common/src/rule/follow.rs | 52 ++++++-- crates/parse_derive/src/impl/rule.rs | 18 +-- crates/parse_lr_common/src/driver.rs | 5 +- crates/parse_lr_common/src/lr0/dfa.rs | 64 +++++++--- crates/parse_lr_common/src/lr0/item.rs | 54 ++++++--- crates/parse_lr_common/src/table.rs | 10 +- crates/parse_lr_lr1/src/builder.rs | 146 +++++++++++++++-------- 11 files changed, 371 insertions(+), 160 deletions(-) diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 9c6586d..e398285 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -8,36 +8,52 @@ pub trait RuleTag where Self: Debug + Copy + Clone + Hash + Eq, { - fn as_rules(&self) -> Vec>; + fn as_rules(&self) -> Vec>; } #[derive(Debug, Clone, Eq)] -pub struct Rule { +pub struct Rule +where + T: TokenTag, + R: RuleTag, +{ pub id: usize, + pub tag: Option, pub lhs: RuleElem, pub rhs: Vec>, } -impl From<(RuleElem, Vec>)> for Rule { - fn from((lhs, rhs): (RuleElem, Vec>)) -> Self { - Rule { id: 0, lhs, rhs } - } -} - -impl PartialEq for Rule { +impl PartialEq for Rule +where + T: TokenTag, + R: RuleTag, +{ fn eq(&self, other: &Self) -> bool { - self.lhs == other.lhs && self.rhs == other.rhs + self.tag == other.tag && self.lhs == other.lhs && self.rhs == other.rhs } } -impl Hash for Rule { +impl Hash for Rule +where + T: TokenTag, + R: RuleTag, +{ fn hash(&self, state: &mut H) { + self.tag.hash(state); self.lhs.hash(state); self.rhs.hash(state); } } -impl Rule { +impl Rule +where + T: TokenTag, + R: RuleTag, +{ + pub fn new(tag: Option, lhs: RuleElem, rhs: Vec>) -> Self { + Rule { id: 0, tag, lhs, rhs } + } + pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { let mut l_nonterms = vec![&self.lhs]; let r_nonterms: Vec<&RuleElem> = self @@ -88,15 +104,23 @@ impl RuleElem { } #[derive(Debug, Clone)] -pub struct RuleSet { +pub struct RuleSet +where + T: TokenTag, + R: RuleTag, +{ pub top: String, - pub rules: Vec>, + pub rules: Vec>, } -impl FromIterator> for RuleSet { +impl FromIterator> for RuleSet +where + T: TokenTag, + R: RuleTag, +{ fn from_iter(rules: I) -> Self where - I: IntoIterator>, + I: IntoIterator>, { let rules = rules.into_iter().collect::>(); let top = match &rules[0].lhs { @@ -107,7 +131,11 @@ impl FromIterator> for RuleSet { } } -impl RuleSet { +impl RuleSet +where + T: TokenTag, + R: RuleTag, +{ pub fn nonterms<'a>(&'a self) -> HashSet<&'a RuleElem> { self.rules.iter().flat_map(|rule| rule.nonterms()).collect() } @@ -116,7 +144,7 @@ impl RuleSet { self.rules.iter().flat_map(|rule| rule.terms()).collect() } - pub fn find_rule<'a>(&'a self, target: &RuleElem) -> Vec<&'a Rule> { + pub fn find_rule<'a>(&'a self, target: &RuleElem) -> Vec<&'a Rule> { self.rules .iter() .filter(|rule| &rule.lhs == target) diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 760f467..f259836 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -9,7 +9,7 @@ pub trait ParseSource { fn iter(&self) -> impl Iterator; - fn into_ruleset(&self) -> RuleSet { + fn into_ruleset(&self) -> RuleSet { let set_id_for_all = |(id, tag): (usize, Self::Tag)| { tag.as_rules() .into_iter() @@ -22,7 +22,7 @@ pub trait ParseSource { self.iter() .enumerate() .flat_map(set_id_for_all) - .collect::>() + .collect::>() } } diff --git a/crates/parse_common/src/rule/director.rs b/crates/parse_common/src/rule/director.rs index 4e804bb..2cdd4cf 100644 --- a/crates/parse_common/src/rule/director.rs +++ b/crates/parse_common/src/rule/director.rs @@ -1,45 +1,61 @@ use std::collections::{HashMap, HashSet}; use copager_cfg::token::TokenTag; -use copager_cfg::rule::{Rule, RuleElem, RuleSet}; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; use crate::rule::{FirstSet, FollowSet}; -pub struct DirectorSet<'a, T: TokenTag> { - map: HashMap<&'a Rule, Vec<&'a RuleElem>>, - _ruleset: &'a RuleSet, +pub struct DirectorSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap<&'a Rule, Vec<&'a RuleElem>>, } -impl<'a, T: TokenTag> From<&'a RuleSet> for DirectorSet<'a, T> { - fn from(ruleset: &'a RuleSet) -> Self { +impl<'a, T, R> From<&'a RuleSet> for DirectorSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { let build = DirectorSetBuilder::from(ruleset).calc(); let map = build.map .into_iter() .map(|(k, v)| (k, v.into_iter().collect())) .collect(); - DirectorSet { - map, - _ruleset: ruleset, - } + DirectorSet { map } } } -impl <'a, T: TokenTag> DirectorSet<'a, T> { - pub fn get(&self, rule: &Rule) -> Option<&[&'a RuleElem]> { +impl <'a, T, R> DirectorSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn get(&self, rule: &Rule) -> Option<&[&'a RuleElem]> { self.map.get(rule).map(|elems| elems.as_slice()) } } -struct DirectorSetBuilder<'a, T: TokenTag> { - map: HashMap<&'a Rule, HashSet<&'a RuleElem>>, - ruleset: &'a RuleSet, - first_set: FirstSet<'a, T>, - follow_set: FollowSet<'a, T>, +struct DirectorSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap<&'a Rule, HashSet<&'a RuleElem>>, + ruleset: &'a RuleSet, + first_set: FirstSet<'a, T, R>, + follow_set: FollowSet<'a, T, R>, } -impl<'a, T: TokenTag> From<&'a RuleSet> for DirectorSetBuilder<'a, T> { - fn from(ruleset: &'a RuleSet) -> Self { +impl<'a, T, R> From<&'a RuleSet> for DirectorSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { let first_set = FirstSet::from(ruleset); let follow_set = FollowSet::from(ruleset); @@ -52,7 +68,11 @@ impl<'a, T: TokenTag> From<&'a RuleSet> for DirectorSetBuilder<'a, T> { } } -impl<'a, T: TokenTag> DirectorSetBuilder<'a, T> { +impl<'a, T, R> DirectorSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ fn calc(mut self) -> Self { for rule in &self.ruleset.rules { self.calc_once(rule); @@ -60,7 +80,7 @@ impl<'a, T: TokenTag> DirectorSetBuilder<'a, T> { self } - fn calc_once(&mut self, rule: &'a Rule) { + fn calc_once(&mut self, rule: &'a Rule) { let lhs = match &rule.lhs { RuleElem::NonTerm(s) => s.as_str(), _ => unreachable!(), diff --git a/crates/parse_common/src/rule/first.rs b/crates/parse_common/src/rule/first.rs index c04068a..d0eb5fd 100644 --- a/crates/parse_common/src/rule/first.rs +++ b/crates/parse_common/src/rule/first.rs @@ -1,15 +1,24 @@ use std::collections::{HashMap, HashSet}; +use std::marker::PhantomData; use copager_cfg::token::TokenTag; -use copager_cfg::rule::{RuleElem, RuleSet}; +use copager_cfg::rule::{RuleElem, RuleSet, RuleTag}; -pub struct FirstSet<'a, T: TokenTag> { +pub struct FirstSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ map: HashMap>>, - _ruleset: &'a RuleSet, + _phantom: PhantomData, } -impl<'a, T: TokenTag> From<&'a RuleSet> for FirstSet<'a, T> { - fn from(ruleset: &'a RuleSet) -> Self { +impl<'a, T, R> From<&'a RuleSet> for FirstSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { let build = FirstSetBuilder::from(ruleset).expand(); let map = build.map .into_iter() @@ -18,25 +27,37 @@ impl<'a, T: TokenTag> From<&'a RuleSet> for FirstSet<'a, T> { FirstSet { map, - _ruleset: ruleset, + _phantom: PhantomData, } } } -impl<'a, T: TokenTag> FirstSet<'a, T> { +impl<'a, T, R> FirstSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ pub fn get(&self, nonterm: &str) -> Option<&[&'a RuleElem]> { self.map.get(nonterm).map(|terms| terms.as_slice()) } } -struct FirstSetBuilder<'a, T: TokenTag> { +struct FirstSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ map: HashMap>>, - ruleset: &'a RuleSet, + ruleset: &'a RuleSet, nonterms: Vec<&'a str>, } -impl<'a, T: TokenTag> From<&'a RuleSet> for FirstSetBuilder<'a, T> { - fn from(ruleset: &'a RuleSet) -> Self { +impl<'a, T, R> From<&'a RuleSet> for FirstSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { let mut map = HashMap::new(); for nonterm in ruleset.nonterms() { if let RuleElem::NonTerm(nonterm) = nonterm { @@ -61,7 +82,11 @@ impl<'a, T: TokenTag> From<&'a RuleSet> for FirstSetBuilder<'a, T> { } } -impl<'a, T: TokenTag> FirstSetBuilder<'a, T> { +impl<'a, T, R> FirstSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ fn expand(mut self) -> Self { while self.expand_child() {} self @@ -86,9 +111,10 @@ impl<'a, T: TokenTag> FirstSetBuilder<'a, T> { } } -fn rhs_first_symbol<'a, T>(ruleset: &'a RuleSet, nonterm: &str) -> impl Iterator> +fn rhs_first_symbol<'a, T, R>(ruleset: &'a RuleSet, nonterm: &str) -> impl Iterator> where T: TokenTag, + R: RuleTag, { let cmp_nonterm = |relem: &RuleElem, lhs: &str| match relem { RuleElem::NonTerm(nonterm) => nonterm == lhs, diff --git a/crates/parse_common/src/rule/follow.rs b/crates/parse_common/src/rule/follow.rs index cba7914..c4e8b4b 100644 --- a/crates/parse_common/src/rule/follow.rs +++ b/crates/parse_common/src/rule/follow.rs @@ -1,17 +1,25 @@ use std::collections::{HashMap, HashSet}; use copager_cfg::token::TokenTag; -use copager_cfg::rule::{RuleElem, RuleSet}; +use copager_cfg::rule::{RuleElem, RuleSet, RuleTag}; use crate::rule::FirstSet; -pub struct FollowSet<'a, T: TokenTag> { +pub struct FollowSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ map: HashMap>>, - _ruleset: &'a RuleSet, + _ruleset: &'a RuleSet, } -impl<'a, T: TokenTag> From<&'a RuleSet> for FollowSet<'a, T> { - fn from(ruleset: &'a RuleSet) -> Self { +impl<'a, T, R> From<&'a RuleSet> for FollowSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { let build = FollowSetBuilder::from(ruleset).expand(); let map = build.map .into_iter() @@ -25,19 +33,31 @@ impl<'a, T: TokenTag> From<&'a RuleSet> for FollowSet<'a, T> { } } -impl<'a, T: TokenTag> FollowSet<'a, T> { +impl<'a, T, R> FollowSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ pub fn get(&self, nonterm: &str) -> Option<&[&'a RuleElem]> { self.map.get(nonterm).map(|terms| terms.as_slice()) } } -pub struct FollowSetBuilder<'a, T: TokenTag> { +pub struct FollowSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ map: HashMap>>, - ruleset: &'a RuleSet, + ruleset: &'a RuleSet, } -impl<'a, T: TokenTag> From<&'a RuleSet> for FollowSetBuilder<'a, T> { - fn from(ruleset: &'a RuleSet) -> Self { +impl<'a, T, R> From<&'a RuleSet> for FollowSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { let mut map = HashMap::new(); for nonterm in ruleset.nonterms() { if let RuleElem::NonTerm(nonterm) = nonterm { @@ -53,7 +73,11 @@ impl<'a, T: TokenTag> From<&'a RuleSet> for FollowSetBuilder<'a, T> { } } -impl<'a, T: TokenTag> FollowSetBuilder<'a, T> { +impl<'a, T, R> FollowSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ fn expand(mut self) -> Self { while self.expand_child() {} self @@ -104,7 +128,11 @@ impl<'a, T: TokenTag> FollowSetBuilder<'a, T> { } } -fn first_by<'a, T: TokenTag>(first_set: &FirstSet<'a, T>, relems: &'a [RuleElem]) -> Vec<&'a RuleElem > { +fn first_by<'a, T, R>(first_set: &FirstSet<'a, T, R>, relems: &'a [RuleElem]) -> Vec<&'a RuleElem> +where + T: TokenTag, + R: RuleTag, +{ if relems.is_empty() { vec![&RuleElem::Epsilon] } else { diff --git a/crates/parse_derive/src/impl/rule.rs b/crates/parse_derive/src/impl/rule.rs index 276539e..6855b79 100644 --- a/crates/parse_derive/src/impl/rule.rs +++ b/crates/parse_derive/src/impl/rule.rs @@ -29,7 +29,7 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { quote! { impl RuleTag<#enum_assoc_type> for #enum_name { - fn as_rules(&self) -> Vec> { + fn as_rules(&self) -> Vec> { match self { #( #enum_matcher_table_i2r, )* } @@ -49,7 +49,7 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { struct VariantInfo<'a> { parent_ident: &'a Ident, self_ident: &'a Ident, - rules: Vec, + rule_lhs_rhs_tuples: Vec, } impl<'a> VariantInfo<'a> { @@ -60,18 +60,18 @@ impl<'a> VariantInfo<'a> { .parse::() .unwrap(); - let mut rules = vec![]; + let mut rule_lhs_rhs_tuples = vec![]; for attr in &variant.attrs { if attr.path().is_ident("rule") { let attr = attr.parse_args::().unwrap().value(); - rules.push(parse_rule(&token_ident, &attr)); + rule_lhs_rhs_tuples.push(parse_rule(&token_ident, &attr)); } } VariantInfo { parent_ident, self_ident, - rules, + rule_lhs_rhs_tuples, } } @@ -84,11 +84,11 @@ impl<'a> VariantInfo<'a> { fn gen_matcher_ident_to_rule(&self) -> TokenStream { let ident = self.gen_ident(); - if self.rules.is_empty() { + if self.rule_lhs_rhs_tuples.is_empty() { quote! { #ident => unimplemented!() } } else { - let rules = &self.rules; - quote! { #ident => vec![#(#rules),*] } + let lhs_rhs_tuple = &self.rule_lhs_rhs_tuples; + quote! { #ident => vec![#(Rule::new(Some(#ident), #lhs_rhs_tuple)),*] } } } } @@ -118,5 +118,5 @@ fn parse_rule(token: &TokenStream, input: &str) -> TokenStream { rhs }; - quote! { Rule::from((#lhs, vec![ #( #rhs, )* ])) } + quote! { #lhs, vec![ #( #rhs, )* ], } } diff --git a/crates/parse_lr_common/src/driver.rs b/crates/parse_lr_common/src/driver.rs index 1e94f12..5ce056a 100644 --- a/crates/parse_lr_common/src/driver.rs +++ b/crates/parse_lr_common/src/driver.rs @@ -44,12 +44,13 @@ where yield ParseEvent::Read(token); break; }, - (LRAction::Reduce(tag, rule), _) => { + (LRAction::Reduce(rule), _) => { + let tag = rule.tag.unwrap(); let lhs = lhs_as_str(&rule.lhs); let rhs_len = rule.rhs.len(); self.stack.truncate(self.stack.len() - rhs_len); self.stack.push(self.table.get_goto(self.stack.len()-1, lhs).unwrap()); - yield ParseEvent::Parse { rule: *tag, len: rhs_len }; + yield ParseEvent::Parse { rule: tag, len: rhs_len }; }, (LRAction::Accept, _) => { return; diff --git a/crates/parse_lr_common/src/lr0/dfa.rs b/crates/parse_lr_common/src/lr0/dfa.rs index 385be8e..b8996e4 100644 --- a/crates/parse_lr_common/src/lr0/dfa.rs +++ b/crates/parse_lr_common/src/lr0/dfa.rs @@ -2,26 +2,34 @@ use std::rc::Rc; use std::marker::PhantomData; use copager_cfg::token::TokenTag; -use copager_cfg::rule::{Rule, RuleElem, RuleSet}; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; use crate::automaton::Automaton; use crate::lr0::item::{LR0Item, LR0ItemSet}; #[derive(Debug)] -pub struct LR0DFANode<'a, T: TokenTag> { +pub struct LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ id: usize, - pub itemset: LR0ItemSet<'a, T>, + pub itemset: LR0ItemSet<'a, T, R>, pub next: Vec<(&'a RuleElem, Rc)>, // (cond, next_node) } -impl<'a, T: TokenTag> LR0DFANode<'a, T> { - pub fn contains(&self, rule: &Rule) -> bool { +impl<'a, T, R> LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn contains(&self, rule: &Rule) -> bool { self.contains_by(|item| item.rule == rule) } pub fn contains_by(&self, cond: F) -> bool where - F: Fn(&LR0Item<'a, T>) -> bool + F: Fn(&LR0Item<'a, T, R>) -> bool { self.itemset .items @@ -31,13 +39,21 @@ impl<'a, T: TokenTag> LR0DFANode<'a, T> { } #[derive(Debug)] -pub struct LR0DFA<'a, T: TokenTag> { - pub nodes: Vec>>, +pub struct LR0DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub nodes: Vec>>, pub edges: Vec<(usize, usize, &'a RuleElem)>, } -impl<'a, T: TokenTag> From<&'a RuleSet> for LR0DFA<'a, T> { - fn from(ruleset: &'a RuleSet) -> Self { +impl<'a, T, R> From<&'a RuleSet> for LR0DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { let dfa_top = LR0DFABuilder::new().start(ruleset); let mut nodes = vec![]; @@ -55,7 +71,11 @@ impl<'a, T: TokenTag> From<&'a RuleSet> for LR0DFA<'a, T> { } } -impl<'a: 'b, 'b, T: TokenTag> Automaton<'a, 'b, T> for LR0DFA<'a, T> { +impl<'a: 'b, 'b, T, R> Automaton<'a, 'b, T> for LR0DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ fn len(&self) -> usize { self.nodes.len() } @@ -66,20 +86,30 @@ impl<'a: 'b, 'b, T: TokenTag> Automaton<'a, 'b, T> for LR0DFA<'a, T> { } #[derive(Debug)] -struct LR0DFABuilder { +struct LR0DFABuilder +where + T: TokenTag, + R: RuleTag, +{ nodes: usize, - _phantom: PhantomData, + _phantom_t: PhantomData, + _phantom_r: PhantomData, } -impl<'a, T: TokenTag> LR0DFABuilder { +impl<'a, T, R> LR0DFABuilder +where + T: TokenTag, + R: RuleTag, +{ fn new() -> Self { LR0DFABuilder { nodes: 0, - _phantom: PhantomData, + _phantom_t: PhantomData, + _phantom_r: PhantomData, } } - fn start(mut self, ruleset: &'a RuleSet) -> LR0DFANode<'a, T> { + fn start(mut self, ruleset: &'a RuleSet) -> LR0DFANode<'a, T, R> { let top = RuleElem::NonTerm(ruleset.top.clone()); let top = ruleset.rules .iter() @@ -90,7 +120,7 @@ impl<'a, T: TokenTag> LR0DFABuilder { self.gen_recursive(top) } - fn gen_recursive(&mut self, mut itemset: LR0ItemSet<'a, T>) -> LR0DFANode<'a, T> + fn gen_recursive(&mut self, mut itemset: LR0ItemSet<'a, T, R>) -> LR0DFANode<'a, T, R> where T: TokenTag, { diff --git a/crates/parse_lr_common/src/lr0/item.rs b/crates/parse_lr_common/src/lr0/item.rs index 2c09740..d0e8387 100644 --- a/crates/parse_lr_common/src/lr0/item.rs +++ b/crates/parse_lr_common/src/lr0/item.rs @@ -1,16 +1,24 @@ use std::collections::{HashMap, HashSet}; use copager_cfg::token::TokenTag; -use copager_cfg::rule::{Rule, RuleElem, RuleSet}; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub struct LR0Item<'a, T: TokenTag> { - pub rule: &'a Rule, +pub struct LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub rule: &'a Rule, pub dot_pos: usize, } -impl<'a, T: TokenTag> From<&'a Rule> for LR0Item<'a, T> { - fn from(rule: &'a Rule) -> Self { +impl<'a, T, R> From<&'a Rule> for LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(rule: &'a Rule) -> Self { if rule.rhs[0] == RuleElem::Epsilon { LR0Item { rule, dot_pos: 1 } } else { @@ -19,7 +27,11 @@ impl<'a, T: TokenTag> From<&'a Rule> for LR0Item<'a, T> { } } -impl<'a, T: TokenTag> LR0Item<'a, T> { +impl<'a, T, R> LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ pub fn gen_next(&self) -> Self { assert!(self.dot_pos + 1 <= self.rule.rhs.len()); LR0Item { @@ -38,13 +50,21 @@ impl<'a, T: TokenTag> LR0Item<'a, T> { } #[derive(Debug)] -pub struct LR0ItemSet<'a, T: TokenTag> { - pub items: HashSet>, - ruleset: &'a RuleSet, +pub struct LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub items: HashSet>, + ruleset: &'a RuleSet, } -impl<'a, T: TokenTag> From<&'a RuleSet> for LR0ItemSet<'a, T> { - fn from(ruleset: &'a RuleSet) -> Self { +impl<'a, T, R> From<&'a RuleSet> for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { LR0ItemSet { items: HashSet::new(), ruleset, @@ -52,13 +72,17 @@ impl<'a, T: TokenTag> From<&'a RuleSet> for LR0ItemSet<'a, T> { } } -impl<'a, T: TokenTag> LR0ItemSet<'a, T> { - pub fn init(mut self, rule: &'a Rule) -> Self { +impl<'a, T, R> LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn init(mut self, rule: &'a Rule) -> Self { self.items.insert(LR0Item::from(rule)); self } - pub fn gen_next_sets(&mut self) -> impl Iterator, LR0ItemSet<'a, T>)> { + pub fn gen_next_sets(&mut self) -> impl Iterator, LR0ItemSet<'a, T, R>)> { self.expand(); let mut next_set_candidates = HashMap::new(); @@ -94,7 +118,7 @@ impl<'a, T: TokenTag> LR0ItemSet<'a, T> { } } - fn expand_once(&self, item: &LR0Item<'a, T>) -> Option>> { + fn expand_once(&self, item: &LR0Item<'a, T, R>) -> Option>> { if let Some(nonterm@RuleElem::NonTerm(..)) = item.check_next_elem() { Some(self.ruleset .find_rule(nonterm) diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs index 4470573..a43a433 100644 --- a/crates/parse_lr_common/src/table.rs +++ b/crates/parse_lr_common/src/table.rs @@ -12,7 +12,7 @@ where R: RuleTag, { Shift(usize), - Reduce(R, Rule), // elems_cnt, rule + Reduce(Rule), Accept, None, } @@ -95,6 +95,14 @@ where } } + pub fn set_reduce(&mut self, state: usize, token: Option, rule: Rule) { + if let Some(token) = token { + self.action_table[state].insert(token, LRAction::Reduce(rule)); + } else { + self.eof_action_table[state] = LRAction::Reduce(rule); + } + } + pub fn build(self) -> LRTable { LRTable { action_table: self.action_table, diff --git a/crates/parse_lr_lr1/src/builder.rs b/crates/parse_lr_lr1/src/builder.rs index 5978ed8..05b6b5e 100644 --- a/crates/parse_lr_lr1/src/builder.rs +++ b/crates/parse_lr_lr1/src/builder.rs @@ -1,11 +1,12 @@ use std::collections::{HashMap, HashSet}; +use std::marker::PhantomData; use std::hash::Hash; use itertools::Itertools; use serde::{Serialize, Deserialize}; use copager_cfg::token::TokenTag; -use copager_cfg::rule::{Rule, RuleElem, RuleSet}; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; use copager_lex::LexSource; use copager_parse::ParseSource; @@ -43,10 +44,11 @@ where let first_set = ruleset.first_set(); // 2. Generate dummy nonterm - let top_dummy: Rule = Rule::from(( + let top_dummy: Rule = Rule::new( + None, RuleElem::new_nonterm("__top_dummy"), vec![RuleElem::new_nonterm(&ruleset.top)], - )); + ); let top_dummy = vec![LRItem::new( &top_dummy, HashSet::from_iter(vec![&RuleElem::EOF]), @@ -69,10 +71,10 @@ where } } - let mut action_table: Vec>> = Vec::with_capacity(dfa.0.len()); - let mut eof_action_table: Vec> = Vec::with_capacity(dfa.0.len()); - let mut goto_table: Vec> = Vec::with_capacity(dfa.0.len()); - for _ in 0..dfa.0.len() { + let mut action_table: Vec>> = Vec::with_capacity(dfa.sets.len()); + let mut eof_action_table: Vec> = Vec::with_capacity(dfa.sets.len()); + let mut goto_table: Vec> = Vec::with_capacity(dfa.sets.len()); + for _ in 0..dfa.sets.len() { action_table.push(HashMap::from_iter( source_l.iter() .map(|token| (token, LRAction::None)) @@ -84,7 +86,7 @@ where // 5. Setup tables let rule_tags = source_p.iter().collect::>(); - for lritem_set in &dfa.0 { + for lritem_set in &dfa.sets { for (token, next) in &lritem_set.next { match &token { RuleElem::NonTerm(s) => { @@ -142,17 +144,26 @@ where } #[derive(Debug)] -struct LRItemDFA<'a, T: TokenTag> ( - Vec> -); +struct LRItemDFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + sets: Vec>, + _phantom: PhantomData, +} -impl<'a, T: TokenTag> LRItemDFA<'a, T> { +impl<'a, T, R> LRItemDFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ fn r#gen( - init_set: LRItemSet<'a, T>, - ruleset: &'a RuleSet, + init_set: LRItemSet<'a, T, R>, + ruleset: &'a RuleSet, first_set: &HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> LRItemDFA<'a, T> { - let issue_id = |old_sets: &Vec>, set: &LRItemSet<'a, T>| { + ) -> LRItemDFA<'a, T, R> { + let issue_id = |old_sets: &Vec>, set: &LRItemSet<'a, T, R>| { if let Some(ex_set) = old_sets.iter().find(|&set0| set0.strict_eq(set)) { Err(ex_set.id) } else { @@ -184,31 +195,50 @@ impl<'a, T: TokenTag> LRItemDFA<'a, T> { loop_idx = (loop_idx.1, loop_idx.1 + new_found_cnt); } - LRItemDFA(lritem_sets) + LRItemDFA { + sets: lritem_sets, + _phantom: PhantomData, + } } } #[derive(Clone, Debug, Eq)] -struct LRItemSet<'a, T: TokenTag> { +struct LRItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ id: i32, next: HashMap<&'a RuleElem, i32>, - lr_items: HashSet>, + lr_items: HashSet>, } -impl<'a, T: TokenTag> PartialEq for LRItemSet<'a, T> { - fn eq(&self, other: &LRItemSet<'a, T>) -> bool { +impl<'a, T, R> PartialEq for LRItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &LRItemSet<'a, T, R>) -> bool { self.lr_items == other.lr_items } } -impl<'a, T: TokenTag> PartialEq>> for LRItemSet<'a, T> { - fn eq(&self, other: &HashSet>) -> bool { +impl<'a, T, R> PartialEq>> for LRItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &HashSet>) -> bool { &self.lr_items == other } } -impl<'a, T: TokenTag> LRItemSet<'a, T> { - fn new(id: i32, lr_items: HashSet>) -> Self { +impl<'a, T, R> LRItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn new(id: i32, lr_items: HashSet>) -> Self { LRItemSet { id, next: HashMap::new(), @@ -227,21 +257,21 @@ impl<'a, T: TokenTag> LRItemSet<'a, T> { fn expand_closure<'b>( mut self, - ruleset: &'a RuleSet, + ruleset: &'a RuleSet, first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> LRItemSet<'a, T> { + ) -> LRItemSet<'a, T, R> { let mut lr_items = self.lr_items.clone(); let mut lr_items_fetched = self.lr_items; loop { - let new_items: Vec> = lr_items_fetched + let new_items: Vec> = lr_items_fetched .iter() .flat_map(|item| item.expand_closure(ruleset, first_set)) .collect(); - let new_items = LRItem::<'_, _>::unify_all(new_items); + let new_items = LRItem::<'_, _, _>::unify_all(new_items); let new_items = HashSet::from_iter(new_items); let bef_len = lr_items.len(); - lr_items = LRItem::<'_, _>::unity_set(lr_items, new_items.clone()); + lr_items = LRItem::<'_, _, _>::unity_set(lr_items, new_items.clone()); let af_len = lr_items.len(); if bef_len == af_len { break; @@ -255,16 +285,16 @@ impl<'a, T: TokenTag> LRItemSet<'a, T> { fn gen_next_sets<'b>( &self, - ruleset: &'a RuleSet, + ruleset: &'a RuleSet, first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> HashMap<&'a RuleElem, LRItemSet<'a, T>> { - let new_items: Vec<(&'a RuleElem, LRItem<'a, T>)> = self + ) -> HashMap<&'a RuleElem, LRItemSet<'a, T, R>> { + let new_items: Vec<(&'a RuleElem, LRItem<'a, T, R>)> = self .lr_items .iter() .filter_map(|lr_item| lr_item.next_dot()) .collect(); - let mut new_sets: HashMap<&RuleElem, HashSet>> = HashMap::new(); + let mut new_sets: HashMap<&RuleElem, HashSet>> = HashMap::new(); for (bef_token, lr_item) in new_items { if new_sets.get(&bef_token).is_none() { new_sets.insert(bef_token, HashSet::new()); @@ -272,7 +302,7 @@ impl<'a, T: TokenTag> LRItemSet<'a, T> { new_sets.get_mut(&bef_token).unwrap().insert(lr_item); } - let mut new_sets_expanded: HashMap<&'a RuleElem, LRItemSet<'_, _>> = HashMap::new(); + let mut new_sets_expanded: HashMap<&'a RuleElem, LRItemSet<'_, _, _>> = HashMap::new(); for (ktoken, new_set) in new_sets { let new_set = LRItemSet::new(0, new_set); let new_set = new_set.expand_closure(ruleset, first_set); @@ -284,27 +314,43 @@ impl<'a, T: TokenTag> LRItemSet<'a, T> { } #[derive(Clone, Debug, Eq)] -struct LRItem<'a, T: TokenTag> { - rule: &'a Rule, +struct LRItem<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + rule: &'a Rule, dot_pos: usize, la_tokens: HashSet<&'a RuleElem>, } -impl<'a, T: TokenTag> Hash for LRItem<'a, T> { +impl<'a, T, R> Hash for LRItem<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ fn hash(&self, state: &mut H) { self.rule.hash(state); self.dot_pos.hash(state); } } -impl<'a, T: TokenTag> PartialEq for LRItem<'a, T> { +impl<'a, T, R> PartialEq for LRItem<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ fn eq(&self, other: &Self) -> bool { self.rule == other.rule && self.dot_pos == other.dot_pos } } -impl<'a, T: TokenTag> LRItem<'a, T> { - fn new(rule: &'a Rule, la_tokens: HashSet<&'a RuleElem>) -> LRItem<'a, T> { +impl<'a, T, R> LRItem<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn new(rule: &'a Rule, la_tokens: HashSet<&'a RuleElem>) -> LRItem<'a, T, R> { LRItem { rule, dot_pos: 0, @@ -320,9 +366,9 @@ impl<'a, T: TokenTag> LRItem<'a, T> { fn expand_closure<'b>( &self, - ruleset: &'a RuleSet, + ruleset: &'a RuleSet, first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> HashSet> { + ) -> HashSet> { let af_la_tokens = if self.dot_pos + 1 < self.rule.rhs.len() { HashSet::from_iter( first_set @@ -340,7 +386,7 @@ impl<'a, T: TokenTag> LRItem<'a, T> { ruleset .find_rule(&self.rule.rhs[self.dot_pos]) .into_iter() - .map(|rule| LRItem::<'_, _>::new(rule, af_la_tokens.clone())) + .map(|rule| LRItem::<'_, _, _>::new(rule, af_la_tokens.clone())) .collect() } else { HashSet::new() @@ -348,7 +394,7 @@ impl<'a, T: TokenTag> LRItem<'a, T> { } #[allow(clippy::int_plus_one)] - fn next_dot(&self) -> Option<(&'a RuleElem, LRItem<'a, T>)> { + fn next_dot(&self) -> Option<(&'a RuleElem, LRItem<'a, T, R>)> { if self.dot_pos + 1 <= self.rule.rhs.len() { let bef_token = &self.rule.rhs[self.dot_pos]; let item = LRItem { @@ -362,7 +408,7 @@ impl<'a, T: TokenTag> LRItem<'a, T> { } } - fn unify(&mut self, other: LRItem<'a, T>) { + fn unify(&mut self, other: LRItem<'a, T, R>) { if self != &other { return; } @@ -373,7 +419,7 @@ impl<'a, T: TokenTag> LRItem<'a, T> { }); } - fn unify_all(mut items: Vec>) -> Vec> { + fn unify_all(mut items: Vec>) -> Vec> { for idx in (0..items.len()).permutations(2) { let (a_idx, b_idx) = (idx[0], idx[1]); let tmp = items[b_idx].clone(); @@ -383,9 +429,9 @@ impl<'a, T: TokenTag> LRItem<'a, T> { } fn unity_set( - items_a: HashSet>, - items_b: HashSet>, - ) -> HashSet> { + items_a: HashSet>, + items_b: HashSet>, + ) -> HashSet> { let mut items_a = Vec::from_iter(items_a); let items_b = Vec::from_iter(items_b); items_a.extend(items_b); From cd84b18429995c9d93544ff8c323bd074da14567 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 11 Nov 2024 18:24:47 +0900 Subject: [PATCH 23/48] [change] LR0DFANode::contains -> find_all --- crates/parse_lr_common/src/lr0/dfa.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/crates/parse_lr_common/src/lr0/dfa.rs b/crates/parse_lr_common/src/lr0/dfa.rs index b8996e4..5fd5260 100644 --- a/crates/parse_lr_common/src/lr0/dfa.rs +++ b/crates/parse_lr_common/src/lr0/dfa.rs @@ -13,7 +13,7 @@ where T: TokenTag, R: RuleTag, { - id: usize, + pub id: usize, pub itemset: LR0ItemSet<'a, T, R>, pub next: Vec<(&'a RuleElem, Rc)>, // (cond, next_node) } @@ -23,18 +23,19 @@ where T: TokenTag, R: RuleTag, { - pub fn contains(&self, rule: &Rule) -> bool { - self.contains_by(|item| item.rule == rule) + pub fn find_all(&self, rule: &Rule) -> impl Iterator> { + self.find_all_by(move |item| item.rule == rule) } - pub fn contains_by(&self, cond: F) -> bool + pub fn find_all_by(&self, cond: F) -> impl Iterator> where - F: Fn(&LR0Item<'a, T, R>) -> bool + F: Fn(&&LR0Item<'a, T, R>) -> bool { self.itemset .items .iter() - .any(cond) + .filter(cond) + .map(|item| item.rule) } } From 3a8b87018678edf8300f4dccc4c8ccc0f06c80a6 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 11 Nov 2024 19:04:50 +0900 Subject: [PATCH 24/48] =?UTF-8?q?[fix]=20=E3=83=8E=E3=83=BC=E3=83=89?= =?UTF-8?q?=E8=A2=AB=E3=82=8A=E3=82=92=E8=80=83=E6=85=AE=E3=81=97=E3=81=A6?= =?UTF-8?q?=20LR0DFA=20=E3=82=92=E4=BD=9C=E6=88=90=E3=81=99=E3=82=8B?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/lr0/dfa.rs | 31 +++++++++------- crates/parse_lr_common/src/lr0/item.rs | 49 ++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 19 deletions(-) diff --git a/crates/parse_lr_common/src/lr0/dfa.rs b/crates/parse_lr_common/src/lr0/dfa.rs index 5fd5260..d58001b 100644 --- a/crates/parse_lr_common/src/lr0/dfa.rs +++ b/crates/parse_lr_common/src/lr0/dfa.rs @@ -1,3 +1,4 @@ +use std::collections::HashSet; use std::rc::Rc; use std::marker::PhantomData; @@ -7,7 +8,7 @@ use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; use crate::automaton::Automaton; use crate::lr0::item::{LR0Item, LR0ItemSet}; -#[derive(Debug)] +#[derive(Debug, Clone, Hash, PartialEq, Eq)] pub struct LR0DFANode<'a, T, R> where T: TokenTag, @@ -87,24 +88,24 @@ where } #[derive(Debug)] -struct LR0DFABuilder +struct LR0DFABuilder<'a, T, R> where T: TokenTag, R: RuleTag, { - nodes: usize, + itemsets: HashSet>, _phantom_t: PhantomData, _phantom_r: PhantomData, } -impl<'a, T, R> LR0DFABuilder +impl<'a, T, R> LR0DFABuilder<'a, T, R> where T: TokenTag, R: RuleTag, { fn new() -> Self { LR0DFABuilder { - nodes: 0, + itemsets: HashSet::new(), _phantom_t: PhantomData, _phantom_r: PhantomData, } @@ -118,23 +119,29 @@ where .unwrap(); let top = LR0ItemSet::from(ruleset).init(top); - self.gen_recursive(top) + self.gen_recursive(top).unwrap() } - fn gen_recursive(&mut self, mut itemset: LR0ItemSet<'a, T, R>) -> LR0DFANode<'a, T, R> + fn gen_recursive(&mut self, mut itemset: LR0ItemSet<'a, T, R>) -> Option> where T: TokenTag, { - let id = self.nodes; + if self.itemsets.contains(&itemset) { + return None; + } + + let id = self.itemsets.len(); + self.itemsets.insert(itemset.clone()); + let next = itemset .gen_next_sets() - .map(|(cond, next_items) | { - (cond, Rc::new(self.gen_recursive(next_items))) + .filter_map(|(cond, next_items) | { + let next_node = self.gen_recursive(next_items); + next_node.map(|next_node| (cond, Rc::new(next_node))) }) .collect(); - self.nodes += 1; - LR0DFANode { id, itemset, next } + Some(LR0DFANode { id, itemset, next }) } } diff --git a/crates/parse_lr_common/src/lr0/item.rs b/crates/parse_lr_common/src/lr0/item.rs index d0e8387..cdb09c8 100644 --- a/crates/parse_lr_common/src/lr0/item.rs +++ b/crates/parse_lr_common/src/lr0/item.rs @@ -1,4 +1,5 @@ use std::collections::{HashMap, HashSet}; +use std::hash::Hash; use copager_cfg::token::TokenTag; use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; @@ -49,13 +50,13 @@ where } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct LR0ItemSet<'a, T, R> where T: TokenTag, R: RuleTag, { - pub items: HashSet>, + pub items: Vec>, ruleset: &'a RuleSet, } @@ -66,19 +67,48 @@ where { fn from(ruleset: &'a RuleSet) -> Self { LR0ItemSet { - items: HashSet::new(), + items: vec![], ruleset, } } } +impl<'a, T, R> Hash for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.items.hash(state); + } +} + +impl<'a, T, R> PartialEq for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.items == other.items + } +} + +impl <'a, T, R> Eq for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{} + impl<'a, T, R> LR0ItemSet<'a, T, R> where T: TokenTag, R: RuleTag, { pub fn init(mut self, rule: &'a Rule) -> Self { - self.items.insert(LR0Item::from(rule)); + let new_item = LR0Item::from(rule); + if !self.items.contains(&new_item) { + self.items.push(new_item); + } self } @@ -98,9 +128,10 @@ where next_set_candidates .into_iter() - .map(|(cond, items)| + .map(|(cond, items)| { + let items = items.into_iter().collect(); (cond, LR0ItemSet { items, ruleset: self.ruleset }) - ) + }) } fn expand(&mut self) { @@ -113,7 +144,11 @@ where .flatten() .collect::>(); for item in new_expaned { - modified |= self.items.insert(item); + if self.items.contains(&item) { + continue; + } + self.items.push(item); + modified = true; } } } From 8547b8635e5063dadc30a41ccff76e2331d1d70b Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 11 Nov 2024 19:19:08 +0900 Subject: [PATCH 25/48] =?UTF-8?q?[add]=20RuleSet::update=5Ftop=20=E3=83=A1?= =?UTF-8?q?=E3=82=BD=E3=83=83=E3=83=89=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/rule.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index e398285..f69f2e2 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -136,6 +136,13 @@ where T: TokenTag, R: RuleTag, { + pub fn update_top(&mut self, rule: Rule) { + if let RuleElem::NonTerm(top) = &rule.lhs { + self.top = top.to_string(); + } + self.rules.push(rule); + } + pub fn nonterms<'a>(&'a self) -> HashSet<&'a RuleElem> { self.rules.iter().flat_map(|rule| rule.nonterms()).collect() } From bc240f1aef40189b7302a01eb9c2d24038d83e93 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 11 Nov 2024 19:37:35 +0900 Subject: [PATCH 26/48] =?UTF-8?q?[update]=20dfa,=20item,=20rule=20?= =?UTF-8?q?=E7=B3=BB=E6=A7=8B=E9=80=A0=E4=BD=93=E3=81=AB=20Debug=EF=BC=8CD?= =?UTF-8?q?isplay=20=E3=82=92=E5=AE=9F=E8=A3=85=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/rule.rs | 21 ++++++++++-- crates/parse_lr_common/src/lr0/dfa.rs | 37 ++++++++++++++++++++- crates/parse_lr_common/src/lr0/item.rs | 46 ++++++++++++++++++++++++-- 3 files changed, 99 insertions(+), 5 deletions(-) diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index f69f2e2..bcddb4e 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -1,5 +1,5 @@ use std::collections::{HashMap, HashSet}; -use std::fmt::Debug; +use std::fmt::{Display, Debug}; use std::hash::Hash; use crate::token::TokenTag; @@ -73,7 +73,7 @@ where } } -#[derive(Debug, Clone, Hash, Eq)] +#[derive(Clone, Hash, Eq)] pub enum RuleElem { NonTerm(String), Term(T), @@ -81,6 +81,23 @@ pub enum RuleElem { EOF, } +impl Display for RuleElem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + RuleElem::NonTerm(s) => write!(f, "<{}>", s), + RuleElem::Term(t) => write!(f, "{:?}", t.as_str()), + RuleElem::Epsilon => write!(f, "ε"), + RuleElem::EOF => write!(f, "$"), + } + } +} + +impl Debug for RuleElem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + impl PartialEq for RuleElem { fn eq(&self, other: &Self) -> bool { match (self, other) { diff --git a/crates/parse_lr_common/src/lr0/dfa.rs b/crates/parse_lr_common/src/lr0/dfa.rs index d58001b..7a914de 100644 --- a/crates/parse_lr_common/src/lr0/dfa.rs +++ b/crates/parse_lr_common/src/lr0/dfa.rs @@ -1,4 +1,5 @@ use std::collections::HashSet; +use std::fmt::Debug; use std::rc::Rc; use std::marker::PhantomData; @@ -8,7 +9,7 @@ use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; use crate::automaton::Automaton; use crate::lr0::item::{LR0Item, LR0ItemSet}; -#[derive(Debug, Clone, Hash, PartialEq, Eq)] +#[derive(Clone, Hash, PartialEq, Eq)] pub struct LR0DFANode<'a, T, R> where T: TokenTag, @@ -19,6 +20,39 @@ where pub next: Vec<(&'a RuleElem, Rc)>, // (cond, next_node) } +impl<'a, T, R> Debug for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + #[derive(Debug)] + #[allow(dead_code)] + struct LR0DFANode<'a, 'b, T, R> + where + T: TokenTag, + R: RuleTag, + { + id: usize, + itemset: &'b LR0ItemSet<'a, T, R>, + next: Vec<(&'a RuleElem, usize)>, + } + + let id = self.id; + let itemset = &self.itemset; + let next = self.next + .iter() + .map(|(cond, next_node)| (*cond, next_node.id)) + .collect::>(); + + if f.alternate() { + return write!(f, "{:#?}", LR0DFANode { id, itemset, next }); + } else { + write!(f, "{:?}", LR0DFANode { id, itemset, next }) + } + } +} + impl<'a, T, R> LR0DFANode<'a, T, R> where T: TokenTag, @@ -62,6 +96,7 @@ where let mut edges = vec![]; let mut stack = vec![Rc::new(dfa_top)]; while let Some(node) = stack.pop() { + println!("{:#?}", node); nodes.push(Rc::clone(&node)); for (cond, next_node) in &node.next { edges.push((node.id, next_node.id, *cond)); diff --git a/crates/parse_lr_common/src/lr0/item.rs b/crates/parse_lr_common/src/lr0/item.rs index cdb09c8..1c70a01 100644 --- a/crates/parse_lr_common/src/lr0/item.rs +++ b/crates/parse_lr_common/src/lr0/item.rs @@ -1,10 +1,11 @@ use std::collections::{HashMap, HashSet}; +use std::fmt::{Display, Debug}; use std::hash::Hash; use copager_cfg::token::TokenTag; use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +#[derive(Clone, Copy, Hash, PartialEq, Eq)] pub struct LR0Item<'a, T, R> where T: TokenTag, @@ -14,6 +15,33 @@ where pub dot_pos: usize, } +impl<'a, T, R> Display for LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} -> ", self.rule.lhs)?; + for (i, elem) in self.rule.rhs.iter().enumerate() { + if i == self.dot_pos { + write!(f, "• ")?; + } + write!(f, "{} ", elem)?; + } + write!(f, "") + } +} + +impl<'a, T, R> Debug for LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + impl<'a, T, R> From<&'a Rule> for LR0Item<'a, T, R> where T: TokenTag, @@ -50,7 +78,7 @@ where } } -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct LR0ItemSet<'a, T, R> where T: TokenTag, @@ -60,6 +88,20 @@ where ruleset: &'a RuleSet, } +impl<'a, T, R> Debug for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if f.alternate() { + write!(f, "{:#?}", self.items) + } else { + write!(f, "{:?}", self.items) + } + } +} + impl<'a, T, R> From<&'a RuleSet> for LR0ItemSet<'a, T, R> where T: TokenTag, From bbc6bf2460ddee7357ee39e2523727b8ff8a4fa5 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 11 Nov 2024 20:12:54 +0900 Subject: [PATCH 27/48] =?UTF-8?q?[fix]=20LR0Item=20=E3=81=AE=20Debug=20?= =?UTF-8?q?=E8=A1=A8=E7=A4=BA=E3=81=A7=E6=9C=80=E5=BE=8C=E5=B0=BE=E3=81=AE?= =?UTF-8?q?=E3=83=89=E3=83=83=E3=83=88=E3=81=8C=E8=A1=A8=E7=A4=BA=E3=81=95?= =?UTF-8?q?=E3=82=8C=E3=81=AA=E3=81=84=E3=83=90=E3=82=B0=E3=82=92=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/lr0/item.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/parse_lr_common/src/lr0/item.rs b/crates/parse_lr_common/src/lr0/item.rs index 1c70a01..ebe2489 100644 --- a/crates/parse_lr_common/src/lr0/item.rs +++ b/crates/parse_lr_common/src/lr0/item.rs @@ -28,6 +28,9 @@ where } write!(f, "{} ", elem)?; } + if self.dot_pos == self.rule.rhs.len() { + write!(f, "•")?; + } write!(f, "") } } From 16f427dbf62b96ce97b4408f4258575ced2a126c Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 11 Nov 2024 21:42:42 +0900 Subject: [PATCH 28/48] [update] LRTable::set_reduce -> set --- crates/parse_lr_common/src/table.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs index a43a433..0b4f857 100644 --- a/crates/parse_lr_common/src/table.rs +++ b/crates/parse_lr_common/src/table.rs @@ -5,7 +5,7 @@ use copager_cfg::rule::{Rule, RuleElem, RuleTag}; use crate::automaton::Automaton; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum LRAction where T: TokenTag, @@ -17,7 +17,7 @@ where None, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct LRTable where T: TokenTag, @@ -95,11 +95,11 @@ where } } - pub fn set_reduce(&mut self, state: usize, token: Option, rule: Rule) { + pub fn set(&mut self, state: usize, token: Option, action: LRAction) { if let Some(token) = token { - self.action_table[state].insert(token, LRAction::Reduce(rule)); + self.action_table[state].insert(token, action); } else { - self.eof_action_table[state] = LRAction::Reduce(rule); + self.eof_action_table[state] = action; } } From 1508a3ba63cb42e4b184a0481ada268cf4995696 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 11 Nov 2024 21:52:18 +0900 Subject: [PATCH 29/48] =?UTF-8?q?[fix]=20LR0DFA=20=E3=81=AE=E7=94=9F?= =?UTF-8?q?=E6=88=90=E3=83=9F=E3=82=B9=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/lr0/dfa.rs | 94 +++++++++++++++++++-------- 1 file changed, 66 insertions(+), 28 deletions(-) diff --git a/crates/parse_lr_common/src/lr0/dfa.rs b/crates/parse_lr_common/src/lr0/dfa.rs index 7a914de..1c0b945 100644 --- a/crates/parse_lr_common/src/lr0/dfa.rs +++ b/crates/parse_lr_common/src/lr0/dfa.rs @@ -1,6 +1,8 @@ -use std::collections::HashSet; +use std::collections::{HashMap, BTreeMap}; use std::fmt::Debug; +use std::hash::Hash; use std::rc::Rc; +use std::sync::RwLock; use std::marker::PhantomData; use copager_cfg::token::TokenTag; @@ -9,7 +11,7 @@ use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; use crate::automaton::Automaton; use crate::lr0::item::{LR0Item, LR0ItemSet}; -#[derive(Clone, Hash, PartialEq, Eq)] +#[derive(Clone)] pub struct LR0DFANode<'a, T, R> where T: TokenTag, @@ -17,7 +19,7 @@ where { pub id: usize, pub itemset: LR0ItemSet<'a, T, R>, - pub next: Vec<(&'a RuleElem, Rc)>, // (cond, next_node) + pub next: Vec<(&'a RuleElem, Rc>)>, // (cond, next_node) } impl<'a, T, R> Debug for LR0DFANode<'a, T, R> @@ -42,7 +44,7 @@ where let itemset = &self.itemset; let next = self.next .iter() - .map(|(cond, next_node)| (*cond, next_node.id)) + .map(|(cond, next_node)| (*cond, next_node.read().unwrap().id)) .collect::>(); if f.alternate() { @@ -53,6 +55,33 @@ where } } +impl<'a, T, R> Hash for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.id.hash(state); + self.itemset.hash(state); + } +} + +impl<'a, T, R> PartialEq for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.id == other.id && self.itemset == other.itemset + } +} + +impl<'a, T, R> Eq for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{} + impl<'a, T, R> LR0DFANode<'a, T, R> where T: TokenTag, @@ -80,7 +109,7 @@ where T: TokenTag, R: RuleTag, { - pub nodes: Vec>>, + pub nodes: Vec>>>, pub edges: Vec<(usize, usize, &'a RuleElem)>, } @@ -92,18 +121,27 @@ where fn from(ruleset: &'a RuleSet) -> Self { let dfa_top = LR0DFABuilder::new().start(ruleset); - let mut nodes = vec![]; + let mut nodes = BTreeMap::new(); let mut edges = vec![]; - let mut stack = vec![Rc::new(dfa_top)]; + let mut stack = vec![dfa_top]; while let Some(node) = stack.pop() { - println!("{:#?}", node); - nodes.push(Rc::clone(&node)); - for (cond, next_node) in &node.next { - edges.push((node.id, next_node.id, *cond)); + let from = node.read().unwrap().id; + if nodes.contains_key(&from) { + continue; + } + for (cond, next_node) in &node.read().unwrap().next { + let to = next_node.read().unwrap().id; + edges.push((from, to, *cond)); stack.push(Rc::clone(next_node)); } + nodes.insert(from, Rc::clone(&node)); } + let nodes = nodes + .into_iter() + .map(|(_, node)| node) + .collect(); + LR0DFA { nodes, edges } } } @@ -128,7 +166,7 @@ where T: TokenTag, R: RuleTag, { - itemsets: HashSet>, + itemsets: HashMap, Rc>>>, _phantom_t: PhantomData, _phantom_r: PhantomData, } @@ -140,13 +178,13 @@ where { fn new() -> Self { LR0DFABuilder { - itemsets: HashSet::new(), + itemsets: HashMap::new(), _phantom_t: PhantomData, _phantom_r: PhantomData, } } - fn start(mut self, ruleset: &'a RuleSet) -> LR0DFANode<'a, T, R> { + fn start(mut self, ruleset: &'a RuleSet) -> Rc>> { let top = RuleElem::NonTerm(ruleset.top.clone()); let top = ruleset.rules .iter() @@ -154,29 +192,29 @@ where .unwrap(); let top = LR0ItemSet::from(ruleset).init(top); - self.gen_recursive(top).unwrap() + self.gen_recursive(top) } - fn gen_recursive(&mut self, mut itemset: LR0ItemSet<'a, T, R>) -> Option> + fn gen_recursive(&mut self, mut itemset: LR0ItemSet<'a, T, R>) -> Rc>> where T: TokenTag, { - if self.itemsets.contains(&itemset) { - return None; + if let Some(node) = self.itemsets.get(&itemset) { + return Rc::clone(node); } let id = self.itemsets.len(); - self.itemsets.insert(itemset.clone()); - - let next = itemset - .gen_next_sets() - .filter_map(|(cond, next_items) | { - let next_node = self.gen_recursive(next_items); - next_node.map(|next_node| (cond, Rc::new(next_node))) - }) - .collect(); + let node = LR0DFANode { id, itemset: itemset.clone(), next: vec![] }; + let node = Rc::new(RwLock::new(node)); + self.itemsets.insert(itemset.clone(), Rc::clone(&node)); + + let mut next = vec![]; + for (cond, nextset) in itemset.gen_next_sets() { + next.push((cond, self.gen_recursive(nextset))); + } + node.write().unwrap().next = next; - Some(LR0DFANode { id, itemset, next }) + Rc::clone(&node) } } From 45eafabc0177568734890112d3c40b64b1ce1247 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 11 Nov 2024 21:58:28 +0900 Subject: [PATCH 30/48] =?UTF-8?q?[add]=20LRDriver::accepted=20=E3=83=A1?= =?UTF-8?q?=E3=82=BD=E3=83=83=E3=83=89=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/driver.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/crates/parse_lr_common/src/driver.rs b/crates/parse_lr_common/src/driver.rs index 5ce056a..56accb0 100644 --- a/crates/parse_lr_common/src/driver.rs +++ b/crates/parse_lr_common/src/driver.rs @@ -11,6 +11,7 @@ where { table: &'table LRTable, stack: Vec, + accepted: bool, } impl<'table, T, R> From<&'table LRTable> for LRDriver<'table, T, R> @@ -22,6 +23,7 @@ where LRDriver { table, stack: vec![0], + accepted: false, } } } @@ -38,7 +40,8 @@ where pub gen fn consume(&mut self, token: Option>) -> ParseEvent<'input, T, R> { loop { let top = self.stack[self.stack.len() - 1]; - match (self.table.get_action(top, token), token) { + let action = self.table.get_action(top, token); + match (action, token) { (LRAction::Shift(new_state), Some(token)) => { self.stack.push(*new_state); yield ParseEvent::Read(token); @@ -49,16 +52,17 @@ where let lhs = lhs_as_str(&rule.lhs); let rhs_len = rule.rhs.len(); self.stack.truncate(self.stack.len() - rhs_len); - self.stack.push(self.table.get_goto(self.stack.len()-1, lhs).unwrap()); + self.stack.push(self.table.get_goto(self.stack[self.stack.len()-1], lhs).unwrap()); yield ParseEvent::Parse { rule: tag, len: rhs_len }; }, (LRAction::Accept, _) => { + self.accepted = true; return; } - (LRAction::None, Some(_)) => { + (LRAction::None, Some(token)) => { // TODO // yield ParseEvent::Err(ParseError::new_unexpected_token(token).into()); - yield ParseEvent::Err(anyhow::anyhow!("unexpected token").into()); + yield ParseEvent::Err(anyhow::anyhow!("unexpected token {}", token.as_str()).into()); return; } (LRAction::None, None) => { @@ -71,6 +75,10 @@ where } } } + + pub fn accepted(&self) -> bool { + self.accepted + } } fn lhs_as_str(lhs: &RuleElem) -> &str { From 7a814354e34802285f35c8c8b906ea223ea3c6e0 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 11 Nov 2024 22:02:23 +0900 Subject: [PATCH 31/48] =?UTF-8?q?[add]=20parse=5Flr=5Flr0=20=E5=AE=9F?= =?UTF-8?q?=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 3 + crates/parse_lr_lr0/Cargo.toml | 7 +++ crates/parse_lr_lr0/src/lib.rs | 85 +++++++++++++++++++++++---- crates/parse_lr_lr0/tests/simple.rs | 89 +++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 10 deletions(-) create mode 100644 crates/parse_lr_lr0/tests/simple.rs diff --git a/Cargo.lock b/Cargo.lock index 69a9a12..96bce13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -211,7 +211,10 @@ version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", + "copager_core", + "copager_ir_void", "copager_lex", + "copager_lex_regex", "copager_parse", "copager_parse_lr_common", "thiserror", diff --git a/crates/parse_lr_lr0/Cargo.toml b/crates/parse_lr_lr0/Cargo.toml index 5f82525..a422068 100644 --- a/crates/parse_lr_lr0/Cargo.toml +++ b/crates/parse_lr_lr0/Cargo.toml @@ -12,3 +12,10 @@ copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } copager_parse_lr_common = { path = "../parse_lr_common" } + +[dev-dependencies] +copager_core = { path = "../core" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/parse_lr_lr0/src/lib.rs b/crates/parse_lr_lr0/src/lib.rs index bd31a9e..e178e51 100644 --- a/crates/parse_lr_lr0/src/lib.rs +++ b/crates/parse_lr_lr0/src/lib.rs @@ -1,11 +1,14 @@ #![feature(gen_blocks)] +use std::marker::PhantomData; + use copager_cfg::token::{Token, TokenTag}; -use copager_cfg::rule::RuleTag; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; use copager_lex::LexSource; use copager_parse::{ParseDriver, ParseSource, ParseEvent}; +use copager_parse_lr_common::lr0::item::LR0Item; use copager_parse_lr_common::lr0::LR0DFA; -use copager_parse_lr_common::table::{LRTable, LRTableBuilder}; +use copager_parse_lr_common::table::{LRAction, LRTable, LRTableBuilder}; use copager_parse_lr_common::driver::LRDriver; pub struct LR0 @@ -21,12 +24,9 @@ where Sl: LexSource, Sp: ParseSource, { - fn try_from((_, source_p): (Sl, Sp)) -> anyhow::Result { - let ruleset = source_p.into_ruleset(); - let lr0_dfa = LR0DFA::from(&ruleset); - let lr_table = LRTableBuilder::from(&lr0_dfa).build(); - - Ok(LR0 { table: lr_table }) + fn try_from((source_l, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR0Table::try_from(source_l, source_p)?; + Ok(LR0 { table }) } gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> @@ -34,8 +34,73 @@ where Il: Iterator>, { let mut driver = LRDriver::from(&self.table); - for event in driver.consume(lexer.next()).collect::>() { - yield event; + while !driver.accepted() { + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } + } +} + +pub struct LR0Table +where + T: TokenTag, + R: RuleTag +{ + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl LR0Table +where + T: TokenTag, + R: RuleTag, +{ + fn try_from(source_l: Sl, source_p: Sp) -> anyhow::Result> + where + Sl: LexSource, + Sp: ParseSource, + { + // 最上位規則を追加して RuleSet を更新 + let mut ruleset = source_p.into_ruleset(); + let top_dummy = Rule::new( + None, + RuleElem::new_nonterm("__top_dummy"), + vec![RuleElem::new_nonterm(&ruleset.top)], + ); + ruleset.update_top(top_dummy.clone()); + + // LR(0) オートマトン作成 + let dfa = LR0DFA::from(&ruleset); + + // LR(0) 構文解析表作成 + let mut builder = LRTableBuilder::from(&dfa); + for node in dfa.nodes { + let node = node.read().unwrap(); + if let Some(rule) = node.find_all_by(is_lr0_reduce_state).next() { + // S -> Top . を含むノードに対して Accept をマーク + if let Some(_) = node.find_all(&top_dummy).next() { + builder.set(node.id, None, LRAction::Accept); + continue; + } + + // A -> α β . を含むノードに対して Reduce をマーク + builder.set(node.id, None, LRAction::Reduce(rule.clone())); + for token in source_l.iter() { + builder.set(node.id, Some(token), LRAction::Reduce(rule.clone())); + } + } } + let table = builder.build(); + + Ok(table) } } + +fn is_lr0_reduce_state(item: &&LR0Item) -> bool +where + T: TokenTag, + R: RuleTag, +{ + item.check_next_elem().is_none() +} diff --git a/crates/parse_lr_lr0/tests/simple.rs b/crates/parse_lr_lr0/tests/simple.rs new file mode 100644 index 0000000..dc1e410 --- /dev/null +++ b/crates/parse_lr_lr0/tests/simple.rs @@ -0,0 +1,89 @@ +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr_lr0::LR0; +use copager_ir_void::Void; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum TestToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum TestRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type TestGrammar = Grammar; +type TestLexer = RegexLexer; +type TestParser = LR0; +type TestProcessor = Processor; + +#[test] +fn simple_success() { + const OK_INPUTS: [&str; 8] = [ + "10", + "10 + 20", + "10 - 20", + "10 + 20 + 30", + "(10)", + "((((10))))", + "10 + (20 - 30)", + "(10 + 20) - 30", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &OK_INPUTS { + assert!(processor.process::(input).is_ok(), "input: {}", input); + } +} + +#[test] +fn simple_failure() { + const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "+", + "10 20 + 30", + "10 + 20 - 30 (", + "(((10))", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &ERR_INPUTS { + assert!(processor.process::(input).is_err(), "input: {}", input); + } +} From 5c504c43fac11f5294f768d9695c784cf99f3e65 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 08:34:27 +0900 Subject: [PATCH 32/48] =?UTF-8?q?[add]=20parse=5Flr=5Fslr1=20=E4=BD=9C?= =?UTF-8?q?=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 17 ++++ Cargo.toml | 5 +- crates/parse_lr_slr1/Cargo.toml | 22 +++++ crates/parse_lr_slr1/src/lib.rs | 125 +++++++++++++++++++++++++++ crates/parse_lr_slr1/tests/simple.rs | 100 +++++++++++++++++++++ 5 files changed, 268 insertions(+), 1 deletion(-) create mode 100644 crates/parse_lr_slr1/Cargo.toml create mode 100644 crates/parse_lr_slr1/src/lib.rs create mode 100644 crates/parse_lr_slr1/tests/simple.rs diff --git a/Cargo.lock b/Cargo.lock index 96bce13..55c1fdd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,6 +36,7 @@ dependencies = [ "copager_parse_lr_common", "copager_parse_lr_lr0", "copager_parse_lr_lr1", + "copager_parse_lr_slr1", "example_lang_arithmetic", "example_lang_json", "example_lang_pl0", @@ -237,6 +238,22 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_parse_lr_slr1" +version = "0.2.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_core", + "copager_ir_void", + "copager_lex", + "copager_lex_regex", + "copager_parse", + "copager_parse_common", + "copager_parse_lr_common", + "thiserror", +] + [[package]] name = "copager_utils" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index 1a4d80c..16a0d11 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ copager_parse_common = { path = "./crates/parse_common", optional = true } copager_parse_lr_common = { path = "./crates/parse_lr_common", optional = true } copager_parse_lr_lr0 = { path = "./crates/parse_lr_lr0", optional = true } copager_parse_lr_lr1 = { path = "./crates/parse_lr_lr1", optional = true } +copager_parse_lr_slr1 = { path = "./crates/parse_lr_slr1", optional = true } copager_ir = { path = "./crates/ir" } copager_ir_void = { path = "./crates/ir_void", optional = true } copager_ir_sexp = { path = "./crates/ir_sexp", optional = true } @@ -36,7 +37,7 @@ example_lang_xml = { path = "./examples/lang_xml" } all = [ "prebuild", "derive", "dev", # common "regexlex", # lex - "lr1", # parse + "lr0", "lr1", "slr1", # parse "void", "sexp" # ir ] @@ -52,6 +53,7 @@ regexlex = ["dep:copager_lex_regex"] # parse lr0 = ["dep:copager_parse_lr_lr0"] lr1 = ["dep:copager_parse_lr_lr1"] +slr1 = ["dep:copager_parse_lr_slr1"] # ir void = ["dep:copager_ir_void"] @@ -73,6 +75,7 @@ members = [ "./crates/parse_lr_common", "./crates/parse_lr_lr0", "./crates/parse_lr_lr1", + "./crates/parse_lr_slr1", "./crates/ir", "./crates/ir_void", "./crates/ir_sexp", diff --git a/crates/parse_lr_slr1/Cargo.toml b/crates/parse_lr_slr1/Cargo.toml new file mode 100644 index 0000000..c402c4f --- /dev/null +++ b/crates/parse_lr_slr1/Cargo.toml @@ -0,0 +1,22 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_slr1" +version = "0.2.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_parse_common = { path = "../parse_common" } +copager_parse_lr_common = { path = "../parse_lr_common" } + +[dev-dependencies] +copager_core = { path = "../core" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/parse_lr_slr1/src/lib.rs b/crates/parse_lr_slr1/src/lib.rs new file mode 100644 index 0000000..8d7493f --- /dev/null +++ b/crates/parse_lr_slr1/src/lib.rs @@ -0,0 +1,125 @@ +#![feature(gen_blocks)] + +use std::marker::PhantomData; + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; +use copager_lex::LexSource; +use copager_parse::{ParseDriver, ParseSource, ParseEvent}; +use copager_parse_common::rule::FollowSet; +use copager_parse_lr_common::lr0::item::LR0Item; +use copager_parse_lr_common::lr0::LR0DFA; +use copager_parse_lr_common::table::{LRAction, LRTable, LRTableBuilder}; +use copager_parse_lr_common::driver::LRDriver; + +pub struct SLR1 +where + T: TokenTag, + R: RuleTag +{ + table: LRTable, +} + +impl ParseDriver for SLR1 +where + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = SLR1Table::try_from(source_p)?; + Ok(SLR1 { table }) + } + + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut driver = LRDriver::from(&self.table); + while !driver.accepted() { + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } + } +} + +pub struct SLR1Table +where + T: TokenTag, + R: RuleTag +{ + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl SLR1Table +where + T: TokenTag, + R: RuleTag, +{ + fn try_from(source_p: Sp) -> anyhow::Result> + where + Sp: ParseSource, + { + // 最上位規則を追加して RuleSet を更新 + let mut ruleset = source_p.into_ruleset(); + let top_dummy = Rule::new( + None, + RuleElem::new_nonterm("__top_dummy"), + vec![RuleElem::new_nonterm(&ruleset.top)], + ); + ruleset.update_top(top_dummy.clone()); + + // Follow 集合作成 + let follow_set = FollowSet::from(&ruleset); + + // LR(0) オートマトン作成 + let dfa = LR0DFA::from(&ruleset); + + // SLR(1) 構文解析表作成 + let mut builder = LRTableBuilder::from(&dfa); + for node in dfa.nodes { + let node = node.read().unwrap(); + if let Some(rule) = node.find_all_by(is_slr1_reduce_state).next() { + // S -> Top . を含むノードに対して Accept をマーク + if let Some(_) = node.find_all(&top_dummy).next() { + builder.set(node.id, None, LRAction::Accept); + continue; + } + + // A -> α β . を含むノードに対して Reduce をマーク + let lhs = lhs_as_str(&rule.lhs); + for term in follow_set.get(lhs).unwrap() { + match term { + RuleElem::Term(term) => { + builder.set(node.id, Some(*term), LRAction::Reduce(rule.clone())); + } + RuleElem::EOF => { + builder.set(node.id, None, LRAction::Reduce(rule.clone())); + } + _ => {} + } + } + } + } + let table = builder.build(); + + Ok(table) + } +} + +fn is_slr1_reduce_state(item: &&LR0Item) -> bool +where + T: TokenTag, + R: RuleTag, +{ + item.check_next_elem().is_none() +} + +fn lhs_as_str(lhs: &RuleElem) -> &str { + if let RuleElem::NonTerm(nt) = lhs { + nt.as_str() + } else { + unreachable!() + } +} diff --git a/crates/parse_lr_slr1/tests/simple.rs b/crates/parse_lr_slr1/tests/simple.rs new file mode 100644 index 0000000..7211d75 --- /dev/null +++ b/crates/parse_lr_slr1/tests/simple.rs @@ -0,0 +1,100 @@ +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr_slr1::SLR1; +use copager_ir_void::Void; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum TestToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum TestRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type TestGrammar = Grammar; +type TestLexer = RegexLexer; +type TestParser = SLR1; +type TestProcessor = Processor; + +#[test] +fn simple_success() { + const OK_INPUTS: [&str; 10] = [ + "10", + "10 + 20", + "10 - 20", + "10 * 20", + "10 / 20", + "10 + 20 * 30 - 40", + "(10)", + "((((10))))", + "10 * (20 - 30)", + "((10 + 20) * (30 / 40)) - 50", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &OK_INPUTS { + println!("input: {}", input); + processor.process::(input).unwrap(); + } +} + +#[test] +fn simple_failure() { + const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "*", + "10 20 + 30", + "10 + 20 * 30 / 40 (", + "(((10))", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &ERR_INPUTS { + assert!(processor.process::(input).is_err(), "input: {}", input); + } +} From 814803abc4d85d566403460dae81515fb2eeeb34 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 08:35:12 +0900 Subject: [PATCH 33/48] =?UTF-8?q?[fix]=20parse=5Flr=5Flr0=20=E3=81=AE?= =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=B3=E3=83=BC=E3=83=89=E3=82=92?= =?UTF-8?q?=E4=B8=80=E9=83=A8=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_lr0/tests/simple.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/parse_lr_lr0/tests/simple.rs b/crates/parse_lr_lr0/tests/simple.rs index dc1e410..1d78194 100644 --- a/crates/parse_lr_lr0/tests/simple.rs +++ b/crates/parse_lr_lr0/tests/simple.rs @@ -61,7 +61,8 @@ fn simple_success() { .unwrap(); for input in &OK_INPUTS { - assert!(processor.process::(input).is_ok(), "input: {}", input); + println!("input: {}", input); + processor.process::(input).unwrap(); } } From 885f9104ae0291157ef503cef9e6ced405195e95 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 09:08:31 +0900 Subject: [PATCH 34/48] =?UTF-8?q?[update]=20FirstSet::get=20=E3=81=AE?= =?UTF-8?q?=E5=BC=95=E6=95=B0=E3=81=AE=E5=9E=8B=E3=82=92=20RuleElem=20?= =?UTF-8?q?=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_common/src/rule/director.rs | 8 +-- crates/parse_common/src/rule/first.rs | 64 +++++++++++------------- crates/parse_common/src/rule/follow.rs | 8 +-- 3 files changed, 36 insertions(+), 44 deletions(-) diff --git a/crates/parse_common/src/rule/director.rs b/crates/parse_common/src/rule/director.rs index 2cdd4cf..1ffb1a4 100644 --- a/crates/parse_common/src/rule/director.rs +++ b/crates/parse_common/src/rule/director.rs @@ -106,10 +106,10 @@ where if relems.is_empty() { vec![&RuleElem::Epsilon] } else { - match &relems[0] { - RuleElem::NonTerm(s) => self.first_set.get(s.as_str()).unwrap().to_vec(), - t@RuleElem::Term(_) => vec![t], - _ => vec![], + if let Some(first) = self.first_set.get(&relems[0]) { + first.to_vec() + } else { + vec![] } } } diff --git a/crates/parse_common/src/rule/first.rs b/crates/parse_common/src/rule/first.rs index d0eb5fd..dc79ae5 100644 --- a/crates/parse_common/src/rule/first.rs +++ b/crates/parse_common/src/rule/first.rs @@ -9,7 +9,7 @@ where T: TokenTag, R: RuleTag, { - map: HashMap>>, + map: HashMap<&'a RuleElem, Vec<&'a RuleElem>>, _phantom: PhantomData, } @@ -37,8 +37,8 @@ where T: TokenTag, R: RuleTag, { - pub fn get(&self, nonterm: &str) -> Option<&[&'a RuleElem]> { - self.map.get(nonterm).map(|terms| terms.as_slice()) + pub fn get(&self, relem: &RuleElem) -> Option<&[&'a RuleElem]> { + self.map.get(relem).map(|terms| terms.as_slice()) } } @@ -47,9 +47,9 @@ where T: TokenTag, R: RuleTag, { - map: HashMap>>, + map: HashMap<&'a RuleElem, HashSet<&'a RuleElem>>, ruleset: &'a RuleSet, - nonterms: Vec<&'a str>, + nonterms: Vec<&'a RuleElem>, } impl<'a, T, R> From<&'a RuleSet> for FirstSetBuilder<'a, T, R> @@ -59,20 +59,15 @@ where { fn from(ruleset: &'a RuleSet) -> Self { let mut map = HashMap::new(); - for nonterm in ruleset.nonterms() { - if let RuleElem::NonTerm(nonterm) = nonterm { - map.insert(nonterm.clone(), HashSet::new()); - } - } + ruleset.nonterms().iter().for_each(|&nonterm| { + map.insert(nonterm, HashSet::new()); + }); + ruleset.terms().iter().for_each(|&term| { + map.insert(term, HashSet::new()); + map.get_mut(term).unwrap().insert(term); + }); - let nonterms = ruleset.nonterms(); - let nonterms = nonterms - .iter() - .map(|relem| match relem { - RuleElem::NonTerm(nonterm) => nonterm.as_str(), - _ => unreachable!(), - }) - .collect::>(); + let nonterms = ruleset.nonterms().into_iter().collect(); FirstSetBuilder { map, @@ -97,12 +92,11 @@ where for &nonterm in &self.nonterms { let old_len = self.map.get(nonterm).unwrap().len(); for first_symbol in rhs_first_symbol(self.ruleset, nonterm) { - match first_symbol { - RuleElem::NonTerm(first_nonterm) => { - let cand_terms = self.map.get(first_nonterm).unwrap().clone(); - self.map.get_mut(nonterm).unwrap().extend(cand_terms); - }, - _ => { self.map.get_mut(nonterm).unwrap().insert(first_symbol); } + if matches!(first_symbol, RuleElem::NonTerm(_)) { + let cand_terms = self.map.get(first_symbol).unwrap().clone(); + self.map.get_mut(nonterm).unwrap().extend(cand_terms); + } else { + self.map.get_mut(nonterm).unwrap().insert(first_symbol); } } modified |= old_len != self.map.get(nonterm).unwrap().len(); @@ -111,19 +105,14 @@ where } } -fn rhs_first_symbol<'a, T, R>(ruleset: &'a RuleSet, nonterm: &str) -> impl Iterator> +fn rhs_first_symbol<'a, T, R>(ruleset: &'a RuleSet, nonterm: &RuleElem) -> impl Iterator> where T: TokenTag, R: RuleTag, { - let cmp_nonterm = |relem: &RuleElem, lhs: &str| match relem { - RuleElem::NonTerm(nonterm) => nonterm == lhs, - _ => false, - }; - ruleset.rules .iter() - .filter(move |rule| cmp_nonterm(&rule.lhs, nonterm)) + .filter(move |&rule| &rule.lhs == nonterm) .flat_map(|rule| rule.rhs.first()) } @@ -177,22 +166,25 @@ mod test { #[test] fn first_set() { macro_rules! term { - ($expr:ident) => { RuleElem::new_term(TestToken::$expr) }; + ($ident:ident) => { RuleElem::new_term(TestToken::$ident) }; + } + macro_rules! nonterm { + ($expr:expr) => { RuleElem::new_nonterm($expr) }; } let ruleset = TestRule::default().into_ruleset(); let first_set = FirstSet::from(&ruleset); let expected = vec![term!(A)]; - assert!(eq_symbols(first_set.get("S").unwrap(), expected.as_slice())); + assert!(eq_symbols(first_set.get(&nonterm!("S")).unwrap(), expected.as_slice())); let expected = vec![term!(A)]; - assert!(eq_symbols(first_set.get("A").unwrap(), expected.as_slice())); + assert!(eq_symbols(first_set.get(&nonterm!("A")).unwrap(), expected.as_slice())); let expected = vec![term!(A)]; - assert!(eq_symbols(first_set.get("B").unwrap(), expected.as_slice())); + assert!(eq_symbols(first_set.get(&nonterm!("B")).unwrap(), expected.as_slice())); let expected = vec![RuleElem::Epsilon]; - assert!(eq_symbols(first_set.get("C").unwrap(), expected.as_slice())); + assert!(eq_symbols(first_set.get(&nonterm!("C")).unwrap(), expected.as_slice())); } } diff --git a/crates/parse_common/src/rule/follow.rs b/crates/parse_common/src/rule/follow.rs index c4e8b4b..01cf734 100644 --- a/crates/parse_common/src/rule/follow.rs +++ b/crates/parse_common/src/rule/follow.rs @@ -136,10 +136,10 @@ where if relems.is_empty() { vec![&RuleElem::Epsilon] } else { - match &relems[0] { - RuleElem::NonTerm(s) => first_set.get(s.as_str()).unwrap().to_vec(), - t@RuleElem::Term(_) => vec![t], - _ => unreachable!(), + if let Some(first) = first_set.get(&relems[0]) { + first.to_vec() + } else { + vec![] } } } From 035cb49dd253e296053cb1bca7a0b54abda92880 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 09:12:26 +0900 Subject: [PATCH 35/48] =?UTF-8?q?[add]=20FirstSet::get=5Fby=20=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_common/src/rule/director.rs | 14 +------------- crates/parse_common/src/rule/first.rs | 10 ++++++++++ crates/parse_common/src/rule/follow.rs | 18 +----------------- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/crates/parse_common/src/rule/director.rs b/crates/parse_common/src/rule/director.rs index 1ffb1a4..360d7a8 100644 --- a/crates/parse_common/src/rule/director.rs +++ b/crates/parse_common/src/rule/director.rs @@ -86,7 +86,7 @@ where _ => unreachable!(), }; - let rhs_firsts = self.first_by(&rule.rhs); + let rhs_firsts = self.first_set.get_by(&rule.rhs); let cand_elems = if !rhs_firsts.contains(&&RuleElem::Epsilon) { rhs_firsts } else { @@ -101,18 +101,6 @@ where .collect(); self.map.insert(rule, director_elems); } - - fn first_by(&self, relems: &'a [RuleElem]) -> Vec<&'a RuleElem > { - if relems.is_empty() { - vec![&RuleElem::Epsilon] - } else { - if let Some(first) = self.first_set.get(&relems[0]) { - first.to_vec() - } else { - vec![] - } - } - } } #[cfg(test)] diff --git a/crates/parse_common/src/rule/first.rs b/crates/parse_common/src/rule/first.rs index dc79ae5..fdae9be 100644 --- a/crates/parse_common/src/rule/first.rs +++ b/crates/parse_common/src/rule/first.rs @@ -40,6 +40,16 @@ where pub fn get(&self, relem: &RuleElem) -> Option<&[&'a RuleElem]> { self.map.get(relem).map(|terms| terms.as_slice()) } + + pub fn get_by(&self, relems: &'a [RuleElem]) -> Vec<&'a RuleElem> { + if relems.is_empty() { + vec![&RuleElem::Epsilon] + } else if let Some(first) = self.map.get(&relems[0]) { + first.to_vec() + } else { + vec![] + } + } } struct FirstSetBuilder<'a, T, R> diff --git a/crates/parse_common/src/rule/follow.rs b/crates/parse_common/src/rule/follow.rs index 01cf734..f637c31 100644 --- a/crates/parse_common/src/rule/follow.rs +++ b/crates/parse_common/src/rule/follow.rs @@ -95,7 +95,7 @@ where for rhs_idx in 0..rule.rhs.len() { let target = &rule.rhs[rhs_idx]; let follow_symbols = &rule.rhs[rhs_idx+1..]; - let prob_first_symbols = first_by(&first_set, follow_symbols); + let prob_first_symbols = first_set.get_by(follow_symbols); modified |= self.append_by_first(target, &prob_first_symbols); if prob_first_symbols.contains(&&RuleElem::Epsilon) { modified |= self.append_when_nullable(target, lhs); @@ -128,22 +128,6 @@ where } } -fn first_by<'a, T, R>(first_set: &FirstSet<'a, T, R>, relems: &'a [RuleElem]) -> Vec<&'a RuleElem> -where - T: TokenTag, - R: RuleTag, -{ - if relems.is_empty() { - vec![&RuleElem::Epsilon] - } else { - if let Some(first) = first_set.get(&relems[0]) { - first.to_vec() - } else { - vec![] - } - } -} - #[cfg(test)] mod test { use copager_cfg::token::TokenTag; From 33a1816df60eb787dcadaf29539145c3e70ba241 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 11:55:11 +0900 Subject: [PATCH 36/48] =?UTF-8?q?[fix]=20FirstSet::get=5Fby=20=E3=81=AE?= =?UTF-8?q?=E6=8C=99=E5=8B=95=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_common/src/rule/director.rs | 4 ++-- crates/parse_common/src/rule/first.rs | 20 +++++++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/crates/parse_common/src/rule/director.rs b/crates/parse_common/src/rule/director.rs index 360d7a8..a80f7d0 100644 --- a/crates/parse_common/src/rule/director.rs +++ b/crates/parse_common/src/rule/director.rs @@ -86,11 +86,11 @@ where _ => unreachable!(), }; - let rhs_firsts = self.first_set.get_by(&rule.rhs); + let rhs_firsts = self.first_set.get_by(&rule.rhs).to_vec(); let cand_elems = if !rhs_firsts.contains(&&RuleElem::Epsilon) { rhs_firsts } else { - let mut cand_elems = rhs_firsts; + let mut cand_elems = rhs_firsts.to_vec(); cand_elems.extend_from_slice(self.follow_set.get(&lhs).unwrap()); cand_elems }; diff --git a/crates/parse_common/src/rule/first.rs b/crates/parse_common/src/rule/first.rs index fdae9be..dd3dbd6 100644 --- a/crates/parse_common/src/rule/first.rs +++ b/crates/parse_common/src/rule/first.rs @@ -41,13 +41,21 @@ where self.map.get(relem).map(|terms| terms.as_slice()) } - pub fn get_by(&self, relems: &'a [RuleElem]) -> Vec<&'a RuleElem> { + pub fn get_by(&self, relems: &[RuleElem]) -> Vec<&'a RuleElem> { if relems.is_empty() { - vec![&RuleElem::Epsilon] - } else if let Some(first) = self.map.get(&relems[0]) { - first.to_vec() + vec![&RuleElem::EOF] } else { - vec![] + let mut firsts: HashSet<&'a RuleElem> = HashSet::new(); + for relem in relems { + let first_candidates = self.map.get(relem).unwrap(); + firsts.extend(first_candidates); + if firsts.contains(&RuleElem::Epsilon) { + firsts.remove(&RuleElem::Epsilon); + continue + } + break + } + firsts.into_iter().collect() } } } @@ -76,6 +84,8 @@ where map.insert(term, HashSet::new()); map.get_mut(term).unwrap().insert(term); }); + map.insert(&RuleElem::EOF, HashSet::new()); + map.get_mut(&RuleElem::EOF).unwrap().insert(&RuleElem::EOF); let nonterms = ruleset.nonterms().into_iter().collect(); From c70120dafe4bf274d8b3bcd5309b4e2765830891 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 11:56:41 +0900 Subject: [PATCH 37/48] =?UTF-8?q?[add]=20LR1DFA,=20LR1Item=20=E3=81=AA?= =?UTF-8?q?=E3=81=A9=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/Cargo.toml | 1 + crates/parse_lr_common/src/lib.rs | 1 + crates/parse_lr_common/src/lr0/item.rs | 5 +- crates/parse_lr_common/src/lr1.rs | 4 + crates/parse_lr_common/src/lr1/dfa.rs | 225 ++++++++++++++++++++++++ crates/parse_lr_common/src/lr1/item.rs | 230 +++++++++++++++++++++++++ crates/parse_lr_common/src/table.rs | 6 +- 7 files changed, 465 insertions(+), 7 deletions(-) create mode 100644 crates/parse_lr_common/src/lr1.rs create mode 100644 crates/parse_lr_common/src/lr1/dfa.rs create mode 100644 crates/parse_lr_common/src/lr1/item.rs diff --git a/crates/parse_lr_common/Cargo.toml b/crates/parse_lr_common/Cargo.toml index 06abb98..c567405 100644 --- a/crates/parse_lr_common/Cargo.toml +++ b/crates/parse_lr_common/Cargo.toml @@ -10,3 +10,4 @@ anyhow = { workspace = true } thiserror = { workspace = true } copager_cfg = { path = "../cfg" } copager_parse = { path = "../parse" } +copager_parse_common = { path = "../parse_common" } diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs index dd25121..c512803 100644 --- a/crates/parse_lr_common/src/lib.rs +++ b/crates/parse_lr_common/src/lib.rs @@ -4,3 +4,4 @@ mod automaton; pub mod table; pub mod driver; pub mod lr0; +pub mod lr1; diff --git a/crates/parse_lr_common/src/lr0/item.rs b/crates/parse_lr_common/src/lr0/item.rs index ebe2489..1cf6449 100644 --- a/crates/parse_lr_common/src/lr0/item.rs +++ b/crates/parse_lr_common/src/lr0/item.rs @@ -150,10 +150,7 @@ where R: RuleTag, { pub fn init(mut self, rule: &'a Rule) -> Self { - let new_item = LR0Item::from(rule); - if !self.items.contains(&new_item) { - self.items.push(new_item); - } + self.items = vec![LR0Item::from(rule)]; self } diff --git a/crates/parse_lr_common/src/lr1.rs b/crates/parse_lr_common/src/lr1.rs new file mode 100644 index 0000000..6e822ac --- /dev/null +++ b/crates/parse_lr_common/src/lr1.rs @@ -0,0 +1,4 @@ +pub mod item; +pub mod dfa; + +pub use dfa::LR1DFA; diff --git a/crates/parse_lr_common/src/lr1/dfa.rs b/crates/parse_lr_common/src/lr1/dfa.rs new file mode 100644 index 0000000..4a0d157 --- /dev/null +++ b/crates/parse_lr_common/src/lr1/dfa.rs @@ -0,0 +1,225 @@ +use std::collections::{HashMap, BTreeMap}; +use std::fmt::Debug; +use std::hash::Hash; +use std::rc::Rc; +use std::sync::RwLock; +use std::marker::PhantomData; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; +use copager_parse_common::rule::FirstSet; + +use crate::automaton::Automaton; +use crate::lr1::item::{LR1Item, LR1ItemSet}; + +#[derive(Clone)] +pub struct LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub id: usize, + pub itemset: LR1ItemSet<'a, 'b, T, R>, + pub next: Vec<(&'a RuleElem, Rc>)>, // (cond, next_node) +} + +impl<'a, 'b, T, R> Debug for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + #[derive(Debug)] + #[allow(dead_code)] + struct LR1DFANode<'a, 'b, 'c, T, R> + where + T: TokenTag, + R: RuleTag, + { + id: usize, + itemset: &'c LR1ItemSet<'a, 'b, T, R>, + next: Vec<(&'a RuleElem, usize)>, + } + + let id = self.id; + let itemset = &self.itemset; + let next = self.next + .iter() + .map(|(cond, next_node)| (*cond, next_node.read().unwrap().id)) + .collect::>(); + + if f.alternate() { + return write!(f, "{:#?}", LR1DFANode { id, itemset, next }); + } else { + write!(f, "{:?}", LR1DFANode { id, itemset, next }) + } + } +} + +impl<'a, 'b, T, R> Hash for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.id.hash(state); + self.itemset.hash(state); + } +} + +impl<'a, 'b, T, R> PartialEq for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.id == other.id && self.itemset == other.itemset + } +} + +impl<'a, 'b, T, R> Eq for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, 'b, T, R> LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn find_all(&self, rule: &Rule) -> impl Iterator, &'a RuleElem)> { + self.find_all_by(move |item| item.rule == rule) + } + + pub fn find_all_by(&self, cond: F) -> impl Iterator, &'a RuleElem)> + where + F: Fn(&&LR1Item<'a, T, R>) -> bool + { + self.itemset + .items + .iter() + .filter(cond) + .map(|item| (item.rule, item.la_token)) + } +} + +#[derive(Debug)] +pub struct LR1DFA<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub nodes: Vec>>>, + pub edges: Vec<(usize, usize, &'a RuleElem)>, +} + +impl<'a, 'b, T, R> From<(&'a RuleSet, &'b FirstSet<'a, T, R>)> for LR1DFA<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from((ruleset, first_set): (&'a RuleSet, &'b FirstSet<'a, T, R>)) -> Self { + let dfa_top = LR1DFABuilder::new().start(ruleset, &first_set); + + let mut nodes = BTreeMap::new(); + let mut edges = vec![]; + let mut stack = vec![dfa_top]; + while let Some(node) = stack.pop() { + let from = node.read().unwrap().id; + if nodes.contains_key(&from) { + continue; + } + for (cond, next_node) in &node.read().unwrap().next { + let to = next_node.read().unwrap().id; + edges.push((from, to, *cond)); + stack.push(Rc::clone(next_node)); + } + nodes.insert(from, Rc::clone(&node)); + } + + let nodes = nodes + .into_iter() + .map(|(_, node)| node) + .collect(); + + LR1DFA { nodes, edges } + } +} + +impl<'a: 'b, 'b, T, R> Automaton<'a, 'b, T> for LR1DFA<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn len(&self) -> usize { + self.nodes.len() + } + + fn edges(&'b self) -> impl Iterator)> { + self.edges.iter() + } +} + +#[derive(Debug)] +struct LR1DFABuilder<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + itemsets: HashMap, Rc>>>, + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl<'a, 'b, T, R> LR1DFABuilder<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn new() -> Self { + LR1DFABuilder { + itemsets: HashMap::new(), + _phantom_t: PhantomData, + _phantom_r: PhantomData, + } + } + + fn start(mut self, ruleset: &'a RuleSet, first_set: &'b FirstSet<'a, T, R>) -> Rc>> { + let top = RuleElem::NonTerm(ruleset.top.clone()); + let top = ruleset.rules + .iter() + .find(|rule| rule.lhs == top) + .unwrap(); + let top = LR1ItemSet::from((ruleset, first_set)).init(top); + + self.gen_recursive(top) + } + + fn gen_recursive(&mut self, mut itemset: LR1ItemSet<'a, 'b, T, R>) -> Rc>> + where + T: TokenTag, + { + if let Some(node) = self.itemsets.get(&itemset) { + return Rc::clone(node); + } + + let id = self.itemsets.len(); + let node = LR1DFANode { id, itemset: itemset.clone(), next: vec![] }; + let node = Rc::new(RwLock::new(node)); + self.itemsets.insert(itemset.clone(), Rc::clone(&node)); + + let mut next = vec![]; + for (cond, nextset) in itemset.gen_next_sets() { + next.push((cond, self.gen_recursive(nextset))); + } + node.write().unwrap().next = next; + + Rc::clone(&node) + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/lr1/item.rs b/crates/parse_lr_common/src/lr1/item.rs new file mode 100644 index 0000000..7c14247 --- /dev/null +++ b/crates/parse_lr_common/src/lr1/item.rs @@ -0,0 +1,230 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::{Display, Debug}; +use std::hash::Hash; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; +use copager_parse_common::rule::FirstSet; + +#[derive(Clone, Hash, PartialEq, Eq)] +pub struct LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub rule: &'a Rule, + pub dot_pos: usize, + pub la_token: &'a RuleElem, +} + +impl<'a, T, R> Display for LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} -> ", self.rule.lhs)?; + for (i, elem) in self.rule.rhs.iter().enumerate() { + if i == self.dot_pos { + write!(f, "• ")?; + } + write!(f, "{} ", elem)?; + } + if self.dot_pos == self.rule.rhs.len() { + write!(f, "•")?; + } + write!(f, "[{}]", self.la_token) + } +} + +impl<'a, T, R> Debug for LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + +impl<'a, T, R> From<(&'a Rule, &'a RuleElem)> for LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from((rule, la_token): (&'a Rule, &'a RuleElem)) -> Self { + if rule.rhs[0] == RuleElem::Epsilon { + LR1Item { rule, dot_pos: 1, la_token: &RuleElem::EOF } + } else { + LR1Item { rule, dot_pos: 0, la_token } + } + } +} + +impl<'a, T, R> LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn gen_next(&self) -> Self { + assert!(self.dot_pos + 1 <= self.rule.rhs.len()); + LR1Item { + rule: self.rule, + dot_pos: self.dot_pos + 1, + la_token: self.la_token, + } + } + + pub fn check_next_elem(&self) -> Option<&'a RuleElem> { + if self.dot_pos < self.rule.rhs.len() { + Some(&self.rule.rhs[self.dot_pos]) + } else { + None + } + } + + pub fn check_next_elems<'b>(&'b self) -> Vec> { + let mut next_elems = Vec::from(&self.rule.rhs[self.dot_pos..]); + next_elems.push(self.la_token.clone()); + next_elems + } +} + +#[derive(Clone)] +pub struct LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub items: Vec>, + ruleset: &'a RuleSet, + first_set: &'b FirstSet<'a, T, R>, +} + +impl<'a, 'b, T, R> Debug for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if f.alternate() { + write!(f, "{:#?}", self.items) + } else { + write!(f, "{:?}", self.items) + } + } +} + +impl<'a, 'b, T, R> From<(&'a RuleSet, &'b FirstSet<'a, T, R>)> for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from((ruleset, first_set): (&'a RuleSet, &'b FirstSet<'a, T, R>)) -> Self { + LR1ItemSet { + items: vec![], + ruleset, + first_set, + } + } +} + +impl<'a, 'b, T, R> Hash for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.items.hash(state); + } +} + +impl<'a, 'b, T, R> PartialEq for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.items == other.items + } +} + +impl <'a, 'b, T, R> Eq for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, 'b, T, R> LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn init(mut self, rule: &'a Rule) -> Self { + self.items = vec![LR1Item::from((rule, &RuleElem::EOF))]; + self + } + + pub fn gen_next_sets(&mut self) -> impl Iterator, LR1ItemSet<'a, 'b, T, R>)> { + self.expand(); + + let mut next_set_candidates = HashMap::new(); + self.items + .iter() + .filter_map(|item| item.check_next_elem().map(|nelem| (nelem, item))) + .for_each(|(nelem, item) | { + next_set_candidates + .entry(nelem) + .or_insert_with(HashSet::new) + .insert(item.gen_next()); + }); + + next_set_candidates + .into_iter() + .map(|(cond, items)| { + let items = items.into_iter().collect(); + (cond, LR1ItemSet { items, ruleset: self.ruleset, first_set: self.first_set }) + }) + } + + fn expand(&mut self) { + let mut modified = true; + while modified { + modified = false; + let new_expaned = self.items + .iter() + .flat_map(|item| self.expand_once(item)) + .flatten() + .collect::>(); + for item in new_expaned { + if self.items.contains(&item) { + continue; + } + self.items.push(item); + modified = true; + } + } + } + + fn expand_once(&self, item: &LR1Item<'a, T, R>) -> Option>> { + if let Some(nonterm@RuleElem::NonTerm(..)) = item.check_next_elem() { + Some(self.ruleset + .find_rule(nonterm) + .into_iter() + .flat_map(|rule| { + let next_elems = item.check_next_elems(); + self.first_set + .get_by(&next_elems[1..]) + .into_iter() + .map(move |la_token| LR1Item::from((rule, la_token))) + })) + } else { + None + } + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs index 0b4f857..d27b63d 100644 --- a/crates/parse_lr_common/src/table.rs +++ b/crates/parse_lr_common/src/table.rs @@ -23,9 +23,9 @@ where T: TokenTag, R: RuleTag, { - action_table: Vec>>, - eof_action_table: Vec>, - goto_table: Vec>, + pub action_table: Vec>>, + pub eof_action_table: Vec>, + pub goto_table: Vec>, } impl LRTable From 2df435c74928aa2006b1cb8af5a0bdeb79992749 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 11:58:54 +0900 Subject: [PATCH 38/48] =?UTF-8?q?[update]=20LR1=20=E3=82=92=E6=96=B0?= =?UTF-8?q?=E7=89=88=E3=81=AB=E7=BD=AE=E3=81=8D=E6=8F=9B=E3=81=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 23 +- crates/parse_lr_lr1/Cargo.toml | 11 +- crates/parse_lr_lr1/src/builder.rs | 440 ---------------------------- crates/parse_lr_lr1/src/error.rs | 23 -- crates/parse_lr_lr1/src/lib.rs | 152 +++++----- crates/parse_lr_lr1/tests/simple.rs | 105 ++++--- 6 files changed, 143 insertions(+), 611 deletions(-) delete mode 100644 crates/parse_lr_lr1/src/builder.rs delete mode 100644 crates/parse_lr_lr1/src/error.rs diff --git a/Cargo.lock b/Cargo.lock index 55c1fdd..0c169b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -203,6 +203,7 @@ dependencies = [ "anyhow", "copager_cfg", "copager_parse", + "copager_parse_common", "thiserror", ] @@ -228,13 +229,12 @@ dependencies = [ "anyhow", "copager_cfg", "copager_core", + "copager_ir_void", "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr_lr1", - "copager_utils", - "itertools", - "serde", + "copager_parse_common", + "copager_parse_lr_common", "thiserror", ] @@ -263,12 +263,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "either" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" - [[package]] name = "example_build_oneshot" version = "0.1.0" @@ -341,15 +335,6 @@ version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "1.0.11" diff --git a/crates/parse_lr_lr1/Cargo.toml b/crates/parse_lr_lr1/Cargo.toml index c5093de..8698ac6 100644 --- a/crates/parse_lr_lr1/Cargo.toml +++ b/crates/parse_lr_lr1/Cargo.toml @@ -8,16 +8,15 @@ edition = "2024" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde = { workspace = true } -itertools = "0.12.1" -copager_core = { path = "../core" } copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } -copager_utils = { path = "../utils" } +copager_parse_common = { path = "../parse_common" } +copager_parse_lr_common = { path = "../parse_lr_common" } [dev-dependencies] +copager_core = { path = "../core" } copager_lex = { path = "../lex", features = ["derive"] } -copager_lex_regex = { path = "../lex_regex" } +copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } -copager_parse_lr_lr1 = { path = "./" } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/parse_lr_lr1/src/builder.rs b/crates/parse_lr_lr1/src/builder.rs deleted file mode 100644 index 05b6b5e..0000000 --- a/crates/parse_lr_lr1/src/builder.rs +++ /dev/null @@ -1,440 +0,0 @@ -use std::collections::{HashMap, HashSet}; -use std::marker::PhantomData; -use std::hash::Hash; - -use itertools::Itertools; -use serde::{Serialize, Deserialize}; - -use copager_cfg::token::TokenTag; -use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; -use copager_lex::LexSource; -use copager_parse::ParseSource; - -#[derive(Debug, Serialize, Deserialize)] -pub enum LRAction { - Shift(usize), - Reduce(R, usize, usize), // tag, goto_id, elems_cnt - Accept, - None, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct LR1Configure -where - Sl: LexSource, - Sp: ParseSource, -{ - #[serde(bound( - serialize = "Sl::Tag: Serialize, Sp::Tag: Serialize", - deserialize = "Sl::Tag: Deserialize<'de>, Sp::Tag: Deserialize<'de>", - ))] - pub action_table: Vec>>, - pub eof_action_table: Vec>, - pub goto_table: Vec>, -} - -impl LR1Configure -where - Sl: LexSource, - Sp: ParseSource, -{ - pub fn new(source_l: &Sl, source_p: &Sp) -> anyhow::Result { - // 1. Pre-process - let ruleset = source_p.into_ruleset(); - let first_set = ruleset.first_set(); - - // 2. Generate dummy nonterm - let top_dummy: Rule = Rule::new( - None, - RuleElem::new_nonterm("__top_dummy"), - vec![RuleElem::new_nonterm(&ruleset.top)], - ); - let top_dummy = vec![LRItem::new( - &top_dummy, - HashSet::from_iter(vec![&RuleElem::EOF]), - )]; - let lr_items = LRItemSet::new(0, HashSet::from_iter(top_dummy)); - let lr_items = lr_items.expand_closure(&ruleset, &first_set); - - // 3. Generate a DFA - let dfa = LRItemDFA::r#gen(lr_items, &ruleset, &first_set); - - // 4. Initialize tables - let mut idx = 0; - let mut nonterm_table = HashMap::new(); - for relem in ruleset.nonterms() { - if let RuleElem::NonTerm(s) = &relem { - if !nonterm_table.contains_key(s) { - nonterm_table.insert(s.to_string(), idx); - idx += 1; - } - } - } - - let mut action_table: Vec>> = Vec::with_capacity(dfa.sets.len()); - let mut eof_action_table: Vec> = Vec::with_capacity(dfa.sets.len()); - let mut goto_table: Vec> = Vec::with_capacity(dfa.sets.len()); - for _ in 0..dfa.sets.len() { - action_table.push(HashMap::from_iter( - source_l.iter() - .map(|token| (token, LRAction::None)) - .collect::)>>(), - )); - eof_action_table.push(LRAction::None); - goto_table.push(vec![0; nonterm_table.keys().len()]); - } - - // 5. Setup tables - let rule_tags = source_p.iter().collect::>(); - for lritem_set in &dfa.sets { - for (token, next) in &lritem_set.next { - match &token { - RuleElem::NonTerm(s) => { - let id = lritem_set.id as usize; - let label = *nonterm_table.get(s).unwrap(); - goto_table[id][label] = *next as usize; - } - RuleElem::Term(t) => { - let id = lritem_set.id as usize; - let label = action_table[id].get_mut(t).unwrap(); - *label = LRAction::Shift(*next as usize); - } - _ => {} - } - } - - for item in &lritem_set.lr_items { - if item.dot_pos != item.rule.rhs.len() { - continue; - } - if let RuleElem::NonTerm(lhs) = &item.rule.lhs { - for la_token in &item.la_tokens { - if let RuleElem::Term(t) = la_token { - let id = lritem_set.id as usize; - let label = action_table[id].get_mut(t).unwrap(); - *label = LRAction::Reduce( - rule_tags[item.rule.id as usize], - *nonterm_table.get(lhs).unwrap(), - item.rule.rhs.len(), - ); - } - if let RuleElem::EOF = la_token { - let id = lritem_set.id as usize; - eof_action_table[id] = if lhs == "__top_dummy" { - LRAction::Accept - } else { - LRAction::Reduce( - rule_tags[item.rule.id as usize], - *nonterm_table.get(lhs).unwrap(), - item.rule.rhs.len(), - ) - }; - } - } - } - } - } - - Ok(LR1Configure { - action_table, - eof_action_table, - goto_table, - }) - } -} - -#[derive(Debug)] -struct LRItemDFA<'a, T, R> -where - T: TokenTag, - R: RuleTag, -{ - sets: Vec>, - _phantom: PhantomData, -} - -impl<'a, T, R> LRItemDFA<'a, T, R> -where - T: TokenTag, - R: RuleTag, -{ - fn r#gen( - init_set: LRItemSet<'a, T, R>, - ruleset: &'a RuleSet, - first_set: &HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> LRItemDFA<'a, T, R> { - let issue_id = |old_sets: &Vec>, set: &LRItemSet<'a, T, R>| { - if let Some(ex_set) = old_sets.iter().find(|&set0| set0.strict_eq(set)) { - Err(ex_set.id) - } else { - Ok(old_sets.len() as i32) - } - }; - - // "Expand a closure" <--> "Generate next nodes" loop - let mut loop_idx = (0, 1); - let mut lritem_sets = vec![init_set]; - while loop_idx.0 != loop_idx.1 { - let mut new_found_cnt = 0; - for idx in loop_idx.0..loop_idx.1 { - let next_sets = lritem_sets[idx].gen_next_sets(ruleset, first_set); - for (bef_token, mut next_set) in next_sets { - match issue_id(&lritem_sets, &next_set) { - Ok(id) => { - next_set.id = id; - lritem_sets[idx].next.insert(bef_token, id); - lritem_sets.push(next_set); - new_found_cnt += 1; - } - Err(id) => { - lritem_sets[idx].next.insert(bef_token, id); - } - } - } - } - loop_idx = (loop_idx.1, loop_idx.1 + new_found_cnt); - } - - LRItemDFA { - sets: lritem_sets, - _phantom: PhantomData, - } - } -} - -#[derive(Clone, Debug, Eq)] -struct LRItemSet<'a, T, R> -where - T: TokenTag, - R: RuleTag, -{ - id: i32, - next: HashMap<&'a RuleElem, i32>, - lr_items: HashSet>, -} - -impl<'a, T, R> PartialEq for LRItemSet<'a, T, R> -where - T: TokenTag, - R: RuleTag, -{ - fn eq(&self, other: &LRItemSet<'a, T, R>) -> bool { - self.lr_items == other.lr_items - } -} - -impl<'a, T, R> PartialEq>> for LRItemSet<'a, T, R> -where - T: TokenTag, - R: RuleTag, -{ - fn eq(&self, other: &HashSet>) -> bool { - &self.lr_items == other - } -} - -impl<'a, T, R> LRItemSet<'a, T, R> -where - T: TokenTag, - R: RuleTag, -{ - fn new(id: i32, lr_items: HashSet>) -> Self { - LRItemSet { - id, - next: HashMap::new(), - lr_items, - } - } - - fn strict_eq(&self, other: &Self) -> bool { - if self.lr_items.len() != other.lr_items.len() { - return false; - } - self.lr_items - .iter() - .all(|item| other.lr_items.iter().any(|item_b| item_b.strict_eq(item))) - } - - fn expand_closure<'b>( - mut self, - ruleset: &'a RuleSet, - first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> LRItemSet<'a, T, R> { - let mut lr_items = self.lr_items.clone(); - let mut lr_items_fetched = self.lr_items; - loop { - let new_items: Vec> = lr_items_fetched - .iter() - .flat_map(|item| item.expand_closure(ruleset, first_set)) - .collect(); - let new_items = LRItem::<'_, _, _>::unify_all(new_items); - let new_items = HashSet::from_iter(new_items); - - let bef_len = lr_items.len(); - lr_items = LRItem::<'_, _, _>::unity_set(lr_items, new_items.clone()); - let af_len = lr_items.len(); - if bef_len == af_len { - break; - } - lr_items_fetched = new_items; - } - self.lr_items = lr_items; - - self - } - - fn gen_next_sets<'b>( - &self, - ruleset: &'a RuleSet, - first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> HashMap<&'a RuleElem, LRItemSet<'a, T, R>> { - let new_items: Vec<(&'a RuleElem, LRItem<'a, T, R>)> = self - .lr_items - .iter() - .filter_map(|lr_item| lr_item.next_dot()) - .collect(); - - let mut new_sets: HashMap<&RuleElem, HashSet>> = HashMap::new(); - for (bef_token, lr_item) in new_items { - if new_sets.get(&bef_token).is_none() { - new_sets.insert(bef_token, HashSet::new()); - } - new_sets.get_mut(&bef_token).unwrap().insert(lr_item); - } - - let mut new_sets_expanded: HashMap<&'a RuleElem, LRItemSet<'_, _, _>> = HashMap::new(); - for (ktoken, new_set) in new_sets { - let new_set = LRItemSet::new(0, new_set); - let new_set = new_set.expand_closure(ruleset, first_set); - new_sets_expanded.insert(ktoken, new_set); - } - - new_sets_expanded - } -} - -#[derive(Clone, Debug, Eq)] -struct LRItem<'a, T, R> -where - T: TokenTag, - R: RuleTag, -{ - rule: &'a Rule, - dot_pos: usize, - la_tokens: HashSet<&'a RuleElem>, -} - -impl<'a, T, R> Hash for LRItem<'a, T, R> -where - T: TokenTag, - R: RuleTag, -{ - fn hash(&self, state: &mut H) { - self.rule.hash(state); - self.dot_pos.hash(state); - } -} - -impl<'a, T, R> PartialEq for LRItem<'a, T, R> -where - T: TokenTag, - R: RuleTag, -{ - fn eq(&self, other: &Self) -> bool { - self.rule == other.rule && self.dot_pos == other.dot_pos - } -} - -impl<'a, T, R> LRItem<'a, T, R> -where - T: TokenTag, - R: RuleTag, -{ - fn new(rule: &'a Rule, la_tokens: HashSet<&'a RuleElem>) -> LRItem<'a, T, R> { - LRItem { - rule, - dot_pos: 0, - la_tokens, - } - } - - fn strict_eq(&self, other: &Self) -> bool { - self.rule == other.rule - && self.dot_pos == other.dot_pos - && self.la_tokens == other.la_tokens - } - - fn expand_closure<'b>( - &self, - ruleset: &'a RuleSet, - first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> HashSet> { - let af_la_tokens = if self.dot_pos + 1 < self.rule.rhs.len() { - HashSet::from_iter( - first_set - .get(&self.rule.rhs[self.dot_pos + 1]) - .unwrap() - .clone(), - ) - } else { - self.la_tokens.clone() - }; - - if self.dot_pos < self.rule.rhs.len() - && matches!(self.rule.rhs[self.dot_pos], RuleElem::NonTerm(_)) - { - ruleset - .find_rule(&self.rule.rhs[self.dot_pos]) - .into_iter() - .map(|rule| LRItem::<'_, _, _>::new(rule, af_la_tokens.clone())) - .collect() - } else { - HashSet::new() - } - } - - #[allow(clippy::int_plus_one)] - fn next_dot(&self) -> Option<(&'a RuleElem, LRItem<'a, T, R>)> { - if self.dot_pos + 1 <= self.rule.rhs.len() { - let bef_token = &self.rule.rhs[self.dot_pos]; - let item = LRItem { - rule: self.rule, - dot_pos: self.dot_pos + 1, - la_tokens: self.la_tokens.clone(), - }; - Some((bef_token, item)) - } else { - None - } - } - - fn unify(&mut self, other: LRItem<'a, T, R>) { - if self != &other { - return; - } - other.la_tokens.into_iter().for_each(|la_token| { - if !self.la_tokens.contains(&la_token) { - self.la_tokens.insert(la_token); - } - }); - } - - fn unify_all(mut items: Vec>) -> Vec> { - for idx in (0..items.len()).permutations(2) { - let (a_idx, b_idx) = (idx[0], idx[1]); - let tmp = items[b_idx].clone(); - items[a_idx].unify(tmp); - } - items - } - - fn unity_set( - items_a: HashSet>, - items_b: HashSet>, - ) -> HashSet> { - let mut items_a = Vec::from_iter(items_a); - let items_b = Vec::from_iter(items_b); - items_a.extend(items_b); - HashSet::from_iter(Self::unify_all(items_a)) - } -} diff --git a/crates/parse_lr_lr1/src/error.rs b/crates/parse_lr_lr1/src/error.rs deleted file mode 100644 index 4cbb467..0000000 --- a/crates/parse_lr_lr1/src/error.rs +++ /dev/null @@ -1,23 +0,0 @@ -use thiserror::Error; - -use copager_core::error::ParseError as SuperParseError; -use copager_cfg::token::{TokenTag, Token}; - -#[derive(Debug, Error)] -pub enum ParseError { - #[error("Unexpected token {actual:?} found")] - UnexpectedToken { - actual: String, - }, - #[error("Unexpected EOF")] - UnexpectedEOF, -} - -impl ParseError { - pub fn new_unexpected_token(expected: Token) -> SuperParseError { - let err = ParseError::UnexpectedToken { - actual: format!("{:?}", expected.kind), - }; - SuperParseError::from(err).with(expected) - } -} diff --git a/crates/parse_lr_lr1/src/lib.rs b/crates/parse_lr_lr1/src/lib.rs index 0e1a754..6b0b37f 100644 --- a/crates/parse_lr_lr1/src/lib.rs +++ b/crates/parse_lr_lr1/src/lib.rs @@ -1,98 +1,114 @@ #![feature(gen_blocks)] -mod error; -mod builder; +use std::marker::PhantomData; -use std::collections::HashMap; - -use serde::{Serialize, Deserialize}; - -use copager_cfg::token::Token; +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; use copager_lex::LexSource; -use copager_parse::{ParseSource, ParseDriver, ParseEvent}; -use copager_utils::cache::Cacheable; - -use builder::{LR1Configure, LRAction}; -use error::ParseError; +use copager_parse::{ParseDriver, ParseSource, ParseEvent}; +use copager_parse_common::rule::FirstSet; +use copager_parse_lr_common::lr1::item::LR1Item; +use copager_parse_lr_common::lr1::LR1DFA; +use copager_parse_lr_common::table::{LRAction, LRTable, LRTableBuilder}; +use copager_parse_lr_common::driver::LRDriver; -#[derive(Debug)] -pub struct LR1 +pub struct LR1 where - Sl: LexSource, - Sp: ParseSource, + T: TokenTag, + R: RuleTag { - tables: LR1Configure, + table: LRTable, } -impl Cacheable<(Sl, Sp)> for LR1 +impl ParseDriver for LR1 where Sl: LexSource, - Sl::Tag: Serialize + for<'de> Deserialize<'de>, Sp: ParseSource, - Sp::Tag: Serialize + for<'de> Deserialize<'de>, { - type Cache = LR1Configure; - - fn new((source_l, source_p): (Sl, Sp)) -> anyhow::Result { - Ok(LR1Configure::new(&source_l, &source_p)?) + fn try_from((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR1Table::try_from(source_p)?; + Ok(LR1 { table }) } - fn restore(tables: Self::Cache) -> Self { - LR1 { tables } + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut driver = LRDriver::from(&self.table); + while !driver.accepted() { + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } } } -impl ParseDriver for LR1 +pub struct LR1Table where - Sl: LexSource, - Sp: ParseSource, + T: TokenTag, + R: RuleTag { - fn try_from((source_l, source_p): (Sl, Sp)) -> anyhow::Result { - let tables = LR1Configure::new(&source_l, &source_p)?; - Ok(LR1 { tables }) - } + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} - gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> +impl LR1Table +where + T: TokenTag, + R: RuleTag, +{ + fn try_from(source_p: Sp) -> anyhow::Result> where - Il: Iterator>, + Sp: ParseSource, { - let mut stack = vec![0]; - loop { - let token = lexer.next(); - loop { - let top = stack[stack.len() - 1]; - let action = match token { - Some(token) => { - let local_action_table: &HashMap<_, _> = &self.tables.action_table[top]; - (local_action_table.get(&token.kind).unwrap(), Some(token)) - }, - None => (&self.tables.eof_action_table[top], None), - }; - match action { - (LRAction::Shift(new_state), Some(token)) => { - stack.push(*new_state); - yield ParseEvent::Read(token); - break; - } - (LRAction::Reduce(tag, goto, elems_cnt), _) => { - stack.truncate(stack.len() - elems_cnt); - stack.push(self.tables.goto_table[stack[stack.len() - 1]][*goto]); - yield ParseEvent::Parse { rule: *tag, len: *elems_cnt }; - } - (LRAction::Accept, _) => { - return; - } - (LRAction::None, Some(token)) => { - yield ParseEvent::Err(ParseError::new_unexpected_token(token).into()); - return; + // 最上位規則を追加して RuleSet を更新 + let mut ruleset = source_p.into_ruleset(); + let top_dummy = Rule::new( + None, + RuleElem::new_nonterm("__top_dummy"), + vec![RuleElem::new_nonterm(&ruleset.top)], + ); + ruleset.update_top(top_dummy.clone()); + + // First 集合作成 + let first_set = FirstSet::from(&ruleset); + + // LR(1) オートマトン作成 + let dfa = LR1DFA::from((&ruleset, &first_set)); + + // LR(1) 構文解析表作成 + let mut builder = LRTableBuilder::from(&dfa); + for node in &dfa.nodes { + let node = node.read().unwrap(); + for (rule, la_token) in node.find_all_by(is_lr1_reduce_state) { + // A -> α β . を含むノードに対して Reduce をマーク + match la_token { + RuleElem::Term(term) => { + builder.set(node.id, Some(*term), LRAction::Reduce(rule.clone())); } - (LRAction::None, None) => { - yield ParseEvent::Err(ParseError::UnexpectedEOF.into()); - return; + RuleElem::EOF => { + builder.set(node.id, None, LRAction::Reduce(rule.clone())); } - _ => unreachable!(), + _ => {} + } + + // S -> Top . を含むノードに対して Accept をマーク + if let Some(_) = node.find_all(&top_dummy).next() { + builder.set(node.id, None, LRAction::Accept); + continue; } } } + let table = builder.build(); + + Ok(table) } } + +fn is_lr1_reduce_state(item: &&LR1Item) -> bool +where + T: TokenTag, + R: RuleTag, +{ + item.check_next_elem().is_none() +} diff --git a/crates/parse_lr_lr1/tests/simple.rs b/crates/parse_lr_lr1/tests/simple.rs index 393b935..4d94a5b 100644 --- a/crates/parse_lr_lr1/tests/simple.rs +++ b/crates/parse_lr_lr1/tests/simple.rs @@ -1,17 +1,14 @@ -use serde::{Serialize, Deserialize}; - +use copager_core::{Grammar, Processor}; use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleTag, Rule, RuleElem}; -use copager_lex::{LexSource, LexDriver}; +use copager_lex::LexSource; use copager_lex_regex::RegexLexer; -use copager_parse::{ParseSource, ParseDriver, ParseEvent}; +use copager_parse::ParseSource; use copager_parse_lr_lr1::LR1; +use copager_ir_void::Void; -#[derive( - Debug, Default, Copy, Clone, Hash, PartialEq, Eq, - LexSource, Serialize, Deserialize -)] -enum ExprToken { +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum TestToken { #[default] #[token(text = r"\+")] Plus, @@ -31,11 +28,8 @@ enum ExprToken { _Whitespace, } -#[derive( - Debug, Default, Copy, Clone, Hash, PartialEq, Eq, - ParseSource, Serialize, Deserialize -)] -enum ExprRule { +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum TestRule { #[default] #[rule(" ::= Plus ")] #[rule(" ::= Minus ")] @@ -50,56 +44,57 @@ enum ExprRule { Num, } -type MyLexer = RegexLexer; -type MyParser = LR1; - -const OK_INPUTS: [&str; 10] = [ - "10", - "10 + 20", - "10 - 20", - "10 * 20", - "10 / 20", - "10 + 20 * 30 - 40", - "(10)", - "((((10))))", - "10 * (20 - 30)", - "((10 + 20) * (30 / 40)) - 50", -]; - -const ERR_INPUTS: [&str; 7] = [ - "()", - "(10 -", - "10 +", - "*", - "10 20 + 30", - "10 + 20 * 30 / 40 (", - "(((10))", -]; +type TestGrammar = Grammar; +type TestLexer = RegexLexer; +type TestParser = LR1; +type TestProcessor = Processor; #[test] fn simple_success() { + const OK_INPUTS: [&str; 10] = [ + "10", + "10 + 20", + "10 - 20", + "10 * 20", + "10 / 20", + "10 + 20 * 30 - 40", + "(10)", + "((((10))))", + "10 * (20 - 30)", + "((10 + 20) * (30 / 40)) - 50", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + for input in &OK_INPUTS { - assert!(parse(input), "{}", input); + println!("input: {}", input); + processor.process::(input).unwrap(); } } #[test] fn simple_failure() { - for input in &ERR_INPUTS { - assert!(!parse(input), "{}", input); - } -} - -fn parse<'input>(input: &'input str) -> bool { - let source = ExprToken::default(); - let lexer = >::try_from(source).unwrap(); + const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "*", + "10 20 + 30", + "10 + 20 * 30 / 40 (", + "(((10))", + ]; - let source = (ExprToken::default(), ExprRule::default()); - let parser = >::try_from(source).unwrap(); + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); - let mut parse_itr = parser.run(lexer.run(input)); - let is_err = |state| matches!(state, ParseEvent::Err(_)); - let err_happened = parse_itr.any(is_err); - - !err_happened + for input in &ERR_INPUTS { + assert!(processor.process::(input).is_err(), "input: {}", input); + } } From 07ce4bae0cd3e852bb526620af30496e8b13bbbb Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 12:10:30 +0900 Subject: [PATCH 39/48] =?UTF-8?q?[add]=20Token,=20Rule,=20RuleElem=20?= =?UTF-8?q?=E3=81=AB=20Serialize,=20Deserialize=20=E3=82=92=E8=BC=89?= =?UTF-8?q?=E3=81=9B=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/Cargo.toml | 1 + crates/cfg/src/rule.rs | 14 ++++++++++++-- crates/cfg/src/token.rs | 4 +++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/crates/cfg/Cargo.toml b/crates/cfg/Cargo.toml index 220fec0..0514617 100644 --- a/crates/cfg/Cargo.toml +++ b/crates/cfg/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index bcddb4e..798e6ba 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -2,6 +2,8 @@ use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Debug}; use std::hash::Hash; +use serde::{Serialize, Deserialize}; + use crate::token::TokenTag; pub trait RuleTag @@ -11,12 +13,16 @@ where fn as_rules(&self) -> Vec>; } -#[derive(Debug, Clone, Eq)] +#[derive(Debug, Clone, Eq, Serialize, Deserialize)] pub struct Rule where T: TokenTag, R: RuleTag, { + #[serde(bound( + serialize = "T: Serialize, R: Serialize", + deserialize = "T: Deserialize<'de>, R: Deserialize<'de>", + ))] pub id: usize, pub tag: Option, pub lhs: RuleElem, @@ -73,8 +79,12 @@ where } } -#[derive(Clone, Hash, Eq)] +#[derive(Clone, Hash, Eq, Serialize, Deserialize)] pub enum RuleElem { + #[serde(bound( + serialize = "T: Serialize", + deserialize = "T: Deserialize<'de>", + ))] NonTerm(String), Term(T), Epsilon, diff --git a/crates/cfg/src/token.rs b/crates/cfg/src/token.rs index 1469f80..8e47435 100644 --- a/crates/cfg/src/token.rs +++ b/crates/cfg/src/token.rs @@ -1,6 +1,8 @@ use std::fmt::Debug; use std::hash::Hash; +use serde::{Serialize, Deserialize}; + pub trait TokenTag where Self: Debug + Copy + Clone + Hash + Eq, @@ -8,7 +10,7 @@ where fn as_str<'a, 'b>(&'a self) -> &'b str; } -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] pub struct Token<'input, T: TokenTag> { pub kind: T, pub src: &'input str, From daa1890d0bfef2922cf9ddd572afabe9d41bcdee Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 12:11:13 +0900 Subject: [PATCH 40/48] =?UTF-8?q?[clean]=20RuleSet::{first,=20null}=20?= =?UTF-8?q?=E3=82=92=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/rule.rs | 83 +----------------------------------------- 1 file changed, 1 insertion(+), 82 deletions(-) diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 798e6ba..065fe71 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashSet; use std::fmt::{Display, Debug}; use std::hash::Hash; @@ -184,85 +184,4 @@ where .filter(|rule| &rule.lhs == target) .collect() } - - pub fn first_set<'a>(&'a self) -> HashMap<&'a RuleElem, Vec<&'a RuleElem>> { - // 1. Calc a null set - let nulls_set = self.nulls_set(); - - // 2. Initialize a first set - let mut first_set: HashMap<&RuleElem, Vec<&RuleElem>> = HashMap::new(); - first_set.insert(&RuleElem::EOF, vec![&RuleElem::EOF]); - self.terms().into_iter().for_each(|relem| { - first_set.insert(relem, vec![relem]); - }); - self.nonterms().into_iter().for_each(|relem| { - first_set.insert(relem, vec![]); - }); - - // 3. List up candidates from a nonterm set - let mut candidates = vec![]; - for nonterm in self.nonterms() { - let rules = self.find_rule(nonterm); - for rule in rules { - for relem in &rule.rhs { - if &rule.lhs != relem { - candidates.push((nonterm, relem)) - } - if !nulls_set.contains(&relem) { - break; - } - } - } - } - - // 4. Find first set with recursive - let mut updated = true; - while updated { - updated = false; - for (nonterm, candidate) in &candidates { - let found_elems: Vec<&RuleElem> = first_set - .get(candidate) - .unwrap() - .iter() - .filter(|relem| !first_set.get(nonterm).unwrap().contains(relem)) - .copied() - .collect(); - updated = !found_elems.is_empty(); - first_set - .get_mut(nonterm) - .unwrap() - .extend(found_elems.into_iter()); - } - } - - first_set - } - - fn nulls_set<'a>(&'a self) -> Vec<&'a RuleElem> { - // 1. Find null rules - let mut nulls_set: Vec<&RuleElem> = self - .rules - .iter() - .filter(|rule| rule.rhs.is_empty()) - .map(|rule| &rule.lhs) - .collect(); - - // 2. Find null rules with recursive - let mut updated = true; - while updated { - updated = false; - for rule in &self.rules { - if nulls_set.contains(&&rule.lhs) { - continue; - } else if rule.rhs.iter().all(|relem| nulls_set.contains(&relem)) { - nulls_set.push(&rule.lhs); - updated = true; - } else { - continue; - } - } - } - - nulls_set - } } From 651d6ec757b5c5ed712d6dd43df4d77c4a154172 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 12:15:39 +0900 Subject: [PATCH 41/48] =?UTF-8?q?[add]=20LR0,=20LR1,=20SLR1=20=E3=81=AB=20?= =?UTF-8?q?Cacheable=20=E3=82=92=E8=BC=89=E3=81=9B=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 8 ++++++++ crates/parse_lr_common/Cargo.toml | 1 + crates/parse_lr_common/src/table.rs | 6 ++++-- crates/parse_lr_lr0/Cargo.toml | 2 ++ crates/parse_lr_lr0/src/lib.rs | 23 +++++++++++++++++++++++ crates/parse_lr_lr1/Cargo.toml | 2 ++ crates/parse_lr_lr1/src/lib.rs | 22 ++++++++++++++++++++++ crates/parse_lr_slr1/Cargo.toml | 2 ++ crates/parse_lr_slr1/src/lib.rs | 22 ++++++++++++++++++++++ 9 files changed, 86 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c169b9..0cb4cfe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,6 +50,7 @@ name = "copager_cfg" version = "0.2.0" dependencies = [ "anyhow", + "serde", "thiserror", ] @@ -204,6 +205,7 @@ dependencies = [ "copager_cfg", "copager_parse", "copager_parse_common", + "serde", "thiserror", ] @@ -219,6 +221,8 @@ dependencies = [ "copager_lex_regex", "copager_parse", "copager_parse_lr_common", + "copager_utils", + "serde", "thiserror", ] @@ -235,6 +239,8 @@ dependencies = [ "copager_parse", "copager_parse_common", "copager_parse_lr_common", + "copager_utils", + "serde", "thiserror", ] @@ -251,6 +257,8 @@ dependencies = [ "copager_parse", "copager_parse_common", "copager_parse_lr_common", + "copager_utils", + "serde", "thiserror", ] diff --git a/crates/parse_lr_common/Cargo.toml b/crates/parse_lr_common/Cargo.toml index c567405..4de59f4 100644 --- a/crates/parse_lr_common/Cargo.toml +++ b/crates/parse_lr_common/Cargo.toml @@ -8,6 +8,7 @@ edition = "2024" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } copager_cfg = { path = "../cfg" } copager_parse = { path = "../parse" } copager_parse_common = { path = "../parse_common" } diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs index d27b63d..87db904 100644 --- a/crates/parse_lr_common/src/table.rs +++ b/crates/parse_lr_common/src/table.rs @@ -1,11 +1,13 @@ use std::collections::HashMap; +use serde::{Serialize, Deserialize}; + use copager_cfg::token::{Token, TokenTag}; use copager_cfg::rule::{Rule, RuleElem, RuleTag}; use crate::automaton::Automaton; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub enum LRAction where T: TokenTag, @@ -17,7 +19,7 @@ where None, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct LRTable where T: TokenTag, diff --git a/crates/parse_lr_lr0/Cargo.toml b/crates/parse_lr_lr0/Cargo.toml index a422068..4279751 100644 --- a/crates/parse_lr_lr0/Cargo.toml +++ b/crates/parse_lr_lr0/Cargo.toml @@ -8,10 +8,12 @@ edition = "2024" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } copager_parse_lr_common = { path = "../parse_lr_common" } +copager_utils = { path = "../utils" } [dev-dependencies] copager_core = { path = "../core" } diff --git a/crates/parse_lr_lr0/src/lib.rs b/crates/parse_lr_lr0/src/lib.rs index e178e51..c7105f1 100644 --- a/crates/parse_lr_lr0/src/lib.rs +++ b/crates/parse_lr_lr0/src/lib.rs @@ -2,6 +2,8 @@ use std::marker::PhantomData; +use serde::{Serialize, Deserialize}; + use copager_cfg::token::{Token, TokenTag}; use copager_cfg::rule::{Rule, RuleElem, RuleTag}; use copager_lex::LexSource; @@ -10,6 +12,7 @@ use copager_parse_lr_common::lr0::item::LR0Item; use copager_parse_lr_common::lr0::LR0DFA; use copager_parse_lr_common::table::{LRAction, LRTable, LRTableBuilder}; use copager_parse_lr_common::driver::LRDriver; +use copager_utils::cache::Cacheable; pub struct LR0 where @@ -42,6 +45,26 @@ where } } +impl Cacheable<(Sl, Sp)> for LR0 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LRTable; + + fn new((source_l, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR0Table::try_from(source_l, source_p)?; + Ok(table) + } + + fn restore(table: Self::Cache) -> Self { + LR0 { table } + } +} + +#[derive(Debug, Serialize, Deserialize)] pub struct LR0Table where T: TokenTag, diff --git a/crates/parse_lr_lr1/Cargo.toml b/crates/parse_lr_lr1/Cargo.toml index 8698ac6..287b880 100644 --- a/crates/parse_lr_lr1/Cargo.toml +++ b/crates/parse_lr_lr1/Cargo.toml @@ -8,11 +8,13 @@ edition = "2024" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } copager_parse_common = { path = "../parse_common" } copager_parse_lr_common = { path = "../parse_lr_common" } +copager_utils = { path = "../utils" } [dev-dependencies] copager_core = { path = "../core" } diff --git a/crates/parse_lr_lr1/src/lib.rs b/crates/parse_lr_lr1/src/lib.rs index 6b0b37f..a3838dc 100644 --- a/crates/parse_lr_lr1/src/lib.rs +++ b/crates/parse_lr_lr1/src/lib.rs @@ -2,6 +2,8 @@ use std::marker::PhantomData; +use serde::{Serialize, Deserialize}; + use copager_cfg::token::{Token, TokenTag}; use copager_cfg::rule::{Rule, RuleElem, RuleTag}; use copager_lex::LexSource; @@ -11,6 +13,7 @@ use copager_parse_lr_common::lr1::item::LR1Item; use copager_parse_lr_common::lr1::LR1DFA; use copager_parse_lr_common::table::{LRAction, LRTable, LRTableBuilder}; use copager_parse_lr_common::driver::LRDriver; +use copager_utils::cache::Cacheable; pub struct LR1 where @@ -43,6 +46,25 @@ where } } +impl Cacheable<(Sl, Sp)> for LR1 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LRTable; + + fn new((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR1Table::try_from(source_p)?; + Ok(table) + } + + fn restore(table: Self::Cache) -> Self { + LR1 { table } + } +} + pub struct LR1Table where T: TokenTag, diff --git a/crates/parse_lr_slr1/Cargo.toml b/crates/parse_lr_slr1/Cargo.toml index c402c4f..e6a1254 100644 --- a/crates/parse_lr_slr1/Cargo.toml +++ b/crates/parse_lr_slr1/Cargo.toml @@ -8,11 +8,13 @@ edition = "2024" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } copager_parse_common = { path = "../parse_common" } copager_parse_lr_common = { path = "../parse_lr_common" } +copager_utils = { path = "../utils" } [dev-dependencies] copager_core = { path = "../core" } diff --git a/crates/parse_lr_slr1/src/lib.rs b/crates/parse_lr_slr1/src/lib.rs index 8d7493f..6088228 100644 --- a/crates/parse_lr_slr1/src/lib.rs +++ b/crates/parse_lr_slr1/src/lib.rs @@ -2,6 +2,8 @@ use std::marker::PhantomData; +use serde::{Serialize, Deserialize}; + use copager_cfg::token::{Token, TokenTag}; use copager_cfg::rule::{Rule, RuleElem, RuleTag}; use copager_lex::LexSource; @@ -11,6 +13,7 @@ use copager_parse_lr_common::lr0::item::LR0Item; use copager_parse_lr_common::lr0::LR0DFA; use copager_parse_lr_common::table::{LRAction, LRTable, LRTableBuilder}; use copager_parse_lr_common::driver::LRDriver; +use copager_utils::cache::Cacheable; pub struct SLR1 where @@ -43,6 +46,25 @@ where } } +impl Cacheable<(Sl, Sp)> for SLR1 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LRTable; + + fn new((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = SLR1Table::try_from(source_p)?; + Ok(table) + } + + fn restore(table: Self::Cache) -> Self { + SLR1 { table } + } +} + pub struct SLR1Table where T: TokenTag, From 64c54df2dfac32110e1265ff98f797394acca019 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 12:40:23 +0900 Subject: [PATCH 42/48] =?UTF-8?q?[update]=20ir=5Fsexp=20=E3=81=AE=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=B3=E3=83=BC=E3=83=89=E3=81=AE=E4=B8=80?= =?UTF-8?q?=E9=83=A8=E3=82=92=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 + crates/ir_sexp/Cargo.toml | 1 + crates/ir_sexp/tests/simple.rs | 39 ++++++++++------------------------ 3 files changed, 13 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0cb4cfe..45202cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -101,6 +101,7 @@ version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", + "copager_core", "copager_ir", "copager_ir_sexp", "copager_lex", diff --git a/crates/ir_sexp/Cargo.toml b/crates/ir_sexp/Cargo.toml index 72e5cef..a3c4ec8 100644 --- a/crates/ir_sexp/Cargo.toml +++ b/crates/ir_sexp/Cargo.toml @@ -12,6 +12,7 @@ copager_parse = { path = "../parse" } copager_ir = { path = "../ir" } [dev-dependencies] +copager_core = { path = "../core" } copager_lex = { path = "../lex", features = ["derive"] } copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } diff --git a/crates/ir_sexp/tests/simple.rs b/crates/ir_sexp/tests/simple.rs index 126e686..31e983b 100644 --- a/crates/ir_sexp/tests/simple.rs +++ b/crates/ir_sexp/tests/simple.rs @@ -1,10 +1,10 @@ +use copager_core::{Grammar, Processor}; use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleTag, Rule, RuleElem}; -use copager_lex::{LexSource, LexDriver}; +use copager_lex::LexSource; use copager_lex_regex::RegexLexer; -use copager_parse::{ParseSource, ParseDriver, ParseEvent}; +use copager_parse::ParseSource; use copager_parse_lr_lr1::LR1; -use copager_ir::{IR, IRBuilder}; use copager_ir_sexp::SExp; #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] @@ -44,10 +44,6 @@ enum ExprRule { Num, } -type MyLexer = RegexLexer; -type MyParser = LR1; -type MyIR = SExp<'static, ExprToken, ExprRule>; - #[test] fn simple_display() { let ir = parse("1"); @@ -68,28 +64,15 @@ fn simple_eval() { } fn parse<'input>(input: &'input str) -> anyhow::Result> { - let source = ExprToken::default(); - let lexer = >::try_from(source).unwrap(); - - let source = (ExprToken::default(), ExprRule::default()); - let parser = >::try_from(source).unwrap(); - - let mut ir_builder = >::Builder::new(); - for event in parser.run(lexer.run(input)) { - match event { - ParseEvent::Read(token) => { - ir_builder.on_read(token).unwrap(); - } - ParseEvent::Parse { rule, len } => { - ir_builder.on_parse(rule, len).unwrap(); - } - ParseEvent::Err(err) => { - return Err(anyhow::anyhow!("{:?}", err)); - } - } - } + type TestLang = Grammar; + type TestLexer = RegexLexer; + type TestParser = LR1; + type TestProcessor = Processor; - ir_builder.build() + TestProcessor::new() + .build_lexer()? + .build_parser()? + .process::>(input) } fn eval(ir: &SExp<'static, ExprToken, ExprRule>) -> i32 { From e73bb2104f4cd3e82aa18e1bd9c335b8a0445834 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 13:29:33 +0900 Subject: [PATCH 43/48] =?UTF-8?q?[update]=20parse=5Flr=5Fcommon=20?= =?UTF-8?q?=E3=81=8C=E5=85=AC=E9=96=8B=E3=81=99=E3=82=8B=E3=83=A2=E3=82=B8?= =?UTF-8?q?=E3=83=A5=E3=83=BC=E3=83=AB=E6=A7=8B=E9=80=A0=E3=82=92=E6=9B=B4?= =?UTF-8?q?=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/lib.rs | 7 +++++-- crates/parse_lr_lr0/src/lib.rs | 3 +-- crates/parse_lr_lr1/src/lib.rs | 3 +-- crates/parse_lr_slr1/src/lib.rs | 3 +-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs index c512803..56d1636 100644 --- a/crates/parse_lr_common/src/lib.rs +++ b/crates/parse_lr_common/src/lib.rs @@ -1,7 +1,10 @@ #![feature(gen_blocks)] mod automaton; -pub mod table; -pub mod driver; +mod table; +mod driver; pub mod lr0; pub mod lr1; + +pub use table::{LRAction, LRTable, LRTableBuilder}; +pub use driver::LRDriver; diff --git a/crates/parse_lr_lr0/src/lib.rs b/crates/parse_lr_lr0/src/lib.rs index c7105f1..e45d2a7 100644 --- a/crates/parse_lr_lr0/src/lib.rs +++ b/crates/parse_lr_lr0/src/lib.rs @@ -10,8 +10,7 @@ use copager_lex::LexSource; use copager_parse::{ParseDriver, ParseSource, ParseEvent}; use copager_parse_lr_common::lr0::item::LR0Item; use copager_parse_lr_common::lr0::LR0DFA; -use copager_parse_lr_common::table::{LRAction, LRTable, LRTableBuilder}; -use copager_parse_lr_common::driver::LRDriver; +use copager_parse_lr_common::{LRDriver, LRAction, LRTable, LRTableBuilder}; use copager_utils::cache::Cacheable; pub struct LR0 diff --git a/crates/parse_lr_lr1/src/lib.rs b/crates/parse_lr_lr1/src/lib.rs index a3838dc..03429cb 100644 --- a/crates/parse_lr_lr1/src/lib.rs +++ b/crates/parse_lr_lr1/src/lib.rs @@ -11,8 +11,7 @@ use copager_parse::{ParseDriver, ParseSource, ParseEvent}; use copager_parse_common::rule::FirstSet; use copager_parse_lr_common::lr1::item::LR1Item; use copager_parse_lr_common::lr1::LR1DFA; -use copager_parse_lr_common::table::{LRAction, LRTable, LRTableBuilder}; -use copager_parse_lr_common::driver::LRDriver; +use copager_parse_lr_common::{LRDriver, LRAction, LRTable, LRTableBuilder}; use copager_utils::cache::Cacheable; pub struct LR1 diff --git a/crates/parse_lr_slr1/src/lib.rs b/crates/parse_lr_slr1/src/lib.rs index 6088228..3aa1189 100644 --- a/crates/parse_lr_slr1/src/lib.rs +++ b/crates/parse_lr_slr1/src/lib.rs @@ -11,8 +11,7 @@ use copager_parse::{ParseDriver, ParseSource, ParseEvent}; use copager_parse_common::rule::FollowSet; use copager_parse_lr_common::lr0::item::LR0Item; use copager_parse_lr_common::lr0::LR0DFA; -use copager_parse_lr_common::table::{LRAction, LRTable, LRTableBuilder}; -use copager_parse_lr_common::driver::LRDriver; +use copager_parse_lr_common::{LRDriver, LRAction, LRTable, LRTableBuilder}; use copager_utils::cache::Cacheable; pub struct SLR1 From 4ac742f11e8ae40f99bfb0049e57497dd21600e1 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 13:39:04 +0900 Subject: [PATCH 44/48] =?UTF-8?q?[update]=20parse=5Flr=5Fcommon=20?= =?UTF-8?q?=E3=81=8C=E5=85=AC=E9=96=8B=E3=81=99=E3=82=8B=E3=83=A2=E3=82=B8?= =?UTF-8?q?=E3=83=A5=E3=83=BC=E3=83=AB=E6=A7=8B=E9=80=A0=E3=82=92=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=202?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/automaton.rs | 3 +++ crates/parse_lr_common/src/{ => automaton}/lr0.rs | 0 crates/parse_lr_common/src/{ => automaton}/lr0/dfa.rs | 0 crates/parse_lr_common/src/{ => automaton}/lr0/item.rs | 0 crates/parse_lr_common/src/{ => automaton}/lr1.rs | 0 crates/parse_lr_common/src/{ => automaton}/lr1/dfa.rs | 0 crates/parse_lr_common/src/{ => automaton}/lr1/item.rs | 0 crates/parse_lr_common/src/lib.rs | 7 +++++-- 8 files changed, 8 insertions(+), 2 deletions(-) rename crates/parse_lr_common/src/{ => automaton}/lr0.rs (100%) rename crates/parse_lr_common/src/{ => automaton}/lr0/dfa.rs (100%) rename crates/parse_lr_common/src/{ => automaton}/lr0/item.rs (100%) rename crates/parse_lr_common/src/{ => automaton}/lr1.rs (100%) rename crates/parse_lr_common/src/{ => automaton}/lr1/dfa.rs (100%) rename crates/parse_lr_common/src/{ => automaton}/lr1/item.rs (100%) diff --git a/crates/parse_lr_common/src/automaton.rs b/crates/parse_lr_common/src/automaton.rs index 8a8ae8f..f05d586 100644 --- a/crates/parse_lr_common/src/automaton.rs +++ b/crates/parse_lr_common/src/automaton.rs @@ -1,6 +1,9 @@ use copager_cfg::token::TokenTag; use copager_cfg::rule::RuleElem; +pub mod lr0; +pub mod lr1; + pub trait Automaton<'a: 'b, 'b, T: TokenTag + 'a> { fn len(&self) -> usize; fn edges(&'b self) -> impl Iterator)>; diff --git a/crates/parse_lr_common/src/lr0.rs b/crates/parse_lr_common/src/automaton/lr0.rs similarity index 100% rename from crates/parse_lr_common/src/lr0.rs rename to crates/parse_lr_common/src/automaton/lr0.rs diff --git a/crates/parse_lr_common/src/lr0/dfa.rs b/crates/parse_lr_common/src/automaton/lr0/dfa.rs similarity index 100% rename from crates/parse_lr_common/src/lr0/dfa.rs rename to crates/parse_lr_common/src/automaton/lr0/dfa.rs diff --git a/crates/parse_lr_common/src/lr0/item.rs b/crates/parse_lr_common/src/automaton/lr0/item.rs similarity index 100% rename from crates/parse_lr_common/src/lr0/item.rs rename to crates/parse_lr_common/src/automaton/lr0/item.rs diff --git a/crates/parse_lr_common/src/lr1.rs b/crates/parse_lr_common/src/automaton/lr1.rs similarity index 100% rename from crates/parse_lr_common/src/lr1.rs rename to crates/parse_lr_common/src/automaton/lr1.rs diff --git a/crates/parse_lr_common/src/lr1/dfa.rs b/crates/parse_lr_common/src/automaton/lr1/dfa.rs similarity index 100% rename from crates/parse_lr_common/src/lr1/dfa.rs rename to crates/parse_lr_common/src/automaton/lr1/dfa.rs diff --git a/crates/parse_lr_common/src/lr1/item.rs b/crates/parse_lr_common/src/automaton/lr1/item.rs similarity index 100% rename from crates/parse_lr_common/src/lr1/item.rs rename to crates/parse_lr_common/src/automaton/lr1/item.rs diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs index 56d1636..5843aa9 100644 --- a/crates/parse_lr_common/src/lib.rs +++ b/crates/parse_lr_common/src/lib.rs @@ -3,8 +3,11 @@ mod automaton; mod table; mod driver; -pub mod lr0; -pub mod lr1; +// LR 共通部品 pub use table::{LRAction, LRTable, LRTableBuilder}; pub use driver::LRDriver; + +// LR オートマトン +pub use automaton::lr0; +pub use automaton::lr1; From 4ac8f5403d00ebafecf36f4c5d4fecdf26ab02b4 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 13:42:49 +0900 Subject: [PATCH 45/48] =?UTF-8?q?[add]=20LRError=20=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 + crates/parse_lr_common/Cargo.toml | 1 + crates/parse_lr_common/src/driver.rs | 9 +++------ crates/parse_lr_common/src/error.rs | 27 +++++++++++++++++++++++++++ crates/parse_lr_common/src/lib.rs | 3 ++- 5 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 crates/parse_lr_common/src/error.rs diff --git a/Cargo.lock b/Cargo.lock index 45202cb..2067388 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -204,6 +204,7 @@ version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", + "copager_core", "copager_parse", "copager_parse_common", "serde", diff --git a/crates/parse_lr_common/Cargo.toml b/crates/parse_lr_common/Cargo.toml index 4de59f4..ecc8628 100644 --- a/crates/parse_lr_common/Cargo.toml +++ b/crates/parse_lr_common/Cargo.toml @@ -9,6 +9,7 @@ edition = "2024" anyhow = { workspace = true } thiserror = { workspace = true } serde = { workspace = true, features = ["derive"] } +copager_core = { path = "../core" } copager_cfg = { path = "../cfg" } copager_parse = { path = "../parse" } copager_parse_common = { path = "../parse_common" } diff --git a/crates/parse_lr_common/src/driver.rs b/crates/parse_lr_common/src/driver.rs index 56accb0..48c8a1f 100644 --- a/crates/parse_lr_common/src/driver.rs +++ b/crates/parse_lr_common/src/driver.rs @@ -2,6 +2,7 @@ use copager_cfg::token::{TokenTag, Token}; use copager_cfg::rule::{RuleElem, RuleTag}; use copager_parse::ParseEvent; +use crate::error::LRError; use crate::table::{LRAction, LRTable}; pub struct LRDriver<'table, T, R> @@ -60,15 +61,11 @@ where return; } (LRAction::None, Some(token)) => { - // TODO - // yield ParseEvent::Err(ParseError::new_unexpected_token(token).into()); - yield ParseEvent::Err(anyhow::anyhow!("unexpected token {}", token.as_str()).into()); + yield ParseEvent::Err(LRError::new_unexpected_token(token).into()); return; } (LRAction::None, None) => { - // TODO - // yield ParseEvent::Err(ParseError::UnexpectedEOF.into()); - yield ParseEvent::Err(anyhow::anyhow!("unexpected EOF").into()); + yield ParseEvent::Err(LRError::new_unexpected_eof().into()); return; } _ => unreachable!(), diff --git a/crates/parse_lr_common/src/error.rs b/crates/parse_lr_common/src/error.rs new file mode 100644 index 0000000..6b07694 --- /dev/null +++ b/crates/parse_lr_common/src/error.rs @@ -0,0 +1,27 @@ +use thiserror::Error; + +use copager_core::error::ParseError; +use copager_cfg::token::{TokenTag, Token}; + +#[derive(Debug, Error)] +pub enum LRError { + #[error("Unexpected token {actual:?} found")] + UnexpectedToken { + actual: String, + }, + #[error("Unexpected EOF")] + UnexpectedEOF, +} + +impl LRError { + pub fn new_unexpected_token(expected: Token) -> ParseError { + let err = LRError::UnexpectedToken { + actual: format!("{:?}", expected.kind), + }; + ParseError::from(err).with(expected) + } + + pub fn new_unexpected_eof() -> ParseError { + ParseError::from(LRError::UnexpectedEOF) + } +} diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs index 5843aa9..36137d4 100644 --- a/crates/parse_lr_common/src/lib.rs +++ b/crates/parse_lr_common/src/lib.rs @@ -1,8 +1,9 @@ #![feature(gen_blocks)] mod automaton; -mod table; +mod error; mod driver; +mod table; // LR 共通部品 pub use table::{LRAction, LRTable, LRTableBuilder}; From a4aba2f6b4fe93c4dd2526dc25eb0c0c364175eb Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 14:09:51 +0900 Subject: [PATCH 46/48] =?UTF-8?q?[add]=20Rule=20=E3=81=AB=20Display?= =?UTF-8?q?=EF=BC=8CDebug=20=E3=82=92=E8=BC=89=E3=81=9B=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/rule.rs | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 065fe71..156bef0 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -13,7 +13,7 @@ where fn as_rules(&self) -> Vec>; } -#[derive(Debug, Clone, Eq, Serialize, Deserialize)] +#[derive(Clone, Eq, Serialize, Deserialize)] pub struct Rule where T: TokenTag, @@ -29,6 +29,30 @@ where pub rhs: Vec>, } +impl Display for Rule +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} ->", self.lhs)?; + for elem in &self.rhs { + write!(f, " {}", elem)?; + } + write!(f, "") + } +} + +impl Debug for Rule +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} ({})", self, self.id) + } +} + impl PartialEq for Rule where T: TokenTag, From d3422556c8297e8bcd4808505784f489c2d96c31 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 14:12:33 +0900 Subject: [PATCH 47/48] =?UTF-8?q?[add]=20=E6=A7=8B=E6=96=87=E8=A7=A3?= =?UTF-8?q?=E6=9E=90=E8=A1=A8=E6=A7=8B=E7=AF=89=E6=99=82=E3=81=AE=E3=82=B3?= =?UTF-8?q?=E3=83=B3=E3=83=95=E3=83=AA=E3=82=AF=E3=83=88=E3=82=92=E3=82=A8?= =?UTF-8?q?=E3=83=A9=E3=83=BC=E3=81=A8=E3=81=97=E3=81=A6=E5=90=90=E3=81=8D?= =?UTF-8?q?=E5=87=BA=E3=81=99=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/error.rs | 21 +++++++++++++++++++++ crates/parse_lr_common/src/table.rs | 14 +++++++++++++- crates/parse_lr_lr0/src/lib.rs | 6 +++--- crates/parse_lr_lr1/src/lib.rs | 6 +++--- crates/parse_lr_slr1/src/lib.rs | 6 +++--- 5 files changed, 43 insertions(+), 10 deletions(-) diff --git a/crates/parse_lr_common/src/error.rs b/crates/parse_lr_common/src/error.rs index 6b07694..ec8f0ba 100644 --- a/crates/parse_lr_common/src/error.rs +++ b/crates/parse_lr_common/src/error.rs @@ -2,9 +2,16 @@ use thiserror::Error; use copager_core::error::ParseError; use copager_cfg::token::{TokenTag, Token}; +use copager_cfg::rule::RuleTag; + +use crate::table::LRAction; #[derive(Debug, Error)] pub enum LRError { + #[error("Conflict occured at [{action}]")] + Conflilct { + action: String, + }, #[error("Unexpected token {actual:?} found")] UnexpectedToken { actual: String, @@ -14,6 +21,20 @@ pub enum LRError { } impl LRError { + pub fn new_conflict(action: &LRAction) -> ParseError + where + T: TokenTag, + R: RuleTag, + { + let action = match action { + LRAction::Shift(state) => format!("Shift({})", state), + LRAction::Reduce(rule) => format!("Reduce({})", rule), + LRAction::Accept => format!("Accept"), + _ => unimplemented!(), + }; + ParseError::from(LRError::Conflilct{ action }) + } + pub fn new_unexpected_token(expected: Token) -> ParseError { let err = LRError::UnexpectedToken { actual: format!("{:?}", expected.kind), diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs index 87db904..583f6a8 100644 --- a/crates/parse_lr_common/src/table.rs +++ b/crates/parse_lr_common/src/table.rs @@ -6,8 +6,9 @@ use copager_cfg::token::{Token, TokenTag}; use copager_cfg::rule::{Rule, RuleElem, RuleTag}; use crate::automaton::Automaton; +use crate::error::LRError; -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum LRAction where T: TokenTag, @@ -98,11 +99,22 @@ where } pub fn set(&mut self, state: usize, token: Option, action: LRAction) { + let _ = self.try_set(state, token, action); + } + + pub fn try_set(&mut self, state: usize, token: Option, action: LRAction) -> anyhow::Result<()>{ if let Some(token) = token { + if self.action_table[state].contains_key(&token) { + return Err(LRError::new_conflict(&action).into()); + } self.action_table[state].insert(token, action); } else { + if self.eof_action_table[state] != LRAction::None { + return Err(LRError::new_conflict(&action).into()); + } self.eof_action_table[state] = action; } + Ok(()) } pub fn build(self) -> LRTable { diff --git a/crates/parse_lr_lr0/src/lib.rs b/crates/parse_lr_lr0/src/lib.rs index e45d2a7..bf4217a 100644 --- a/crates/parse_lr_lr0/src/lib.rs +++ b/crates/parse_lr_lr0/src/lib.rs @@ -102,14 +102,14 @@ where if let Some(rule) = node.find_all_by(is_lr0_reduce_state).next() { // S -> Top . を含むノードに対して Accept をマーク if let Some(_) = node.find_all(&top_dummy).next() { - builder.set(node.id, None, LRAction::Accept); + builder.try_set(node.id, None, LRAction::Accept)?; continue; } // A -> α β . を含むノードに対して Reduce をマーク - builder.set(node.id, None, LRAction::Reduce(rule.clone())); + builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; for token in source_l.iter() { - builder.set(node.id, Some(token), LRAction::Reduce(rule.clone())); + builder.try_set(node.id, Some(token), LRAction::Reduce(rule.clone()))?; } } } diff --git a/crates/parse_lr_lr1/src/lib.rs b/crates/parse_lr_lr1/src/lib.rs index 03429cb..1f86459 100644 --- a/crates/parse_lr_lr1/src/lib.rs +++ b/crates/parse_lr_lr1/src/lib.rs @@ -105,17 +105,17 @@ where // A -> α β . を含むノードに対して Reduce をマーク match la_token { RuleElem::Term(term) => { - builder.set(node.id, Some(*term), LRAction::Reduce(rule.clone())); + builder.try_set(node.id, Some(*term), LRAction::Reduce(rule.clone()))?; } RuleElem::EOF => { - builder.set(node.id, None, LRAction::Reduce(rule.clone())); + builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; } _ => {} } // S -> Top . を含むノードに対して Accept をマーク if let Some(_) = node.find_all(&top_dummy).next() { - builder.set(node.id, None, LRAction::Accept); + builder.try_set(node.id, None, LRAction::Accept)?; continue; } } diff --git a/crates/parse_lr_slr1/src/lib.rs b/crates/parse_lr_slr1/src/lib.rs index 3aa1189..e2c076d 100644 --- a/crates/parse_lr_slr1/src/lib.rs +++ b/crates/parse_lr_slr1/src/lib.rs @@ -104,7 +104,7 @@ where if let Some(rule) = node.find_all_by(is_slr1_reduce_state).next() { // S -> Top . を含むノードに対して Accept をマーク if let Some(_) = node.find_all(&top_dummy).next() { - builder.set(node.id, None, LRAction::Accept); + builder.try_set(node.id, None, LRAction::Accept)?; continue; } @@ -113,10 +113,10 @@ where for term in follow_set.get(lhs).unwrap() { match term { RuleElem::Term(term) => { - builder.set(node.id, Some(*term), LRAction::Reduce(rule.clone())); + builder.try_set(node.id, Some(*term), LRAction::Reduce(rule.clone()))?; } RuleElem::EOF => { - builder.set(node.id, None, LRAction::Reduce(rule.clone())); + builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; } _ => {} } From 768f4623a74b00d8b5c10f40436a86846db77d09 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 12 Nov 2024 15:01:25 +0900 Subject: [PATCH 48/48] =?UTF-8?q?[fix]=20LRTableBuilder::set=20=E3=81=AE?= =?UTF-8?q?=E6=8C=99=E5=8B=95=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr_common/src/table.rs | 6 +++++- crates/parse_lr_lr0/src/lib.rs | 10 +++++----- crates/parse_lr_lr1/src/lib.rs | 9 ++++----- crates/parse_lr_slr1/src/lib.rs | 14 +++++++------- 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs index 583f6a8..bc1be23 100644 --- a/crates/parse_lr_common/src/table.rs +++ b/crates/parse_lr_common/src/table.rs @@ -99,7 +99,11 @@ where } pub fn set(&mut self, state: usize, token: Option, action: LRAction) { - let _ = self.try_set(state, token, action); + if let Some(token) = token { + self.action_table[state].insert(token, action); + } else { + self.eof_action_table[state] = action; + } } pub fn try_set(&mut self, state: usize, token: Option, action: LRAction) -> anyhow::Result<()>{ diff --git a/crates/parse_lr_lr0/src/lib.rs b/crates/parse_lr_lr0/src/lib.rs index bf4217a..7330f0b 100644 --- a/crates/parse_lr_lr0/src/lib.rs +++ b/crates/parse_lr_lr0/src/lib.rs @@ -99,14 +99,14 @@ where let mut builder = LRTableBuilder::from(&dfa); for node in dfa.nodes { let node = node.read().unwrap(); - if let Some(rule) = node.find_all_by(is_lr0_reduce_state).next() { - // S -> Top . を含むノードに対して Accept をマーク - if let Some(_) = node.find_all(&top_dummy).next() { - builder.try_set(node.id, None, LRAction::Accept)?; + for rule in node.find_all_by(is_lr0_reduce_state) { + // S -> Top . を含む場合,EOF 列に対して Accept をマーク + if rule == &top_dummy { + builder.set(node.id, None, LRAction::Accept); continue; } - // A -> α β . を含むノードに対して Reduce をマーク + // A -> α β . を含む場合 全列に Reduce をマーク builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; for token in source_l.iter() { builder.try_set(node.id, Some(token), LRAction::Reduce(rule.clone()))?; diff --git a/crates/parse_lr_lr1/src/lib.rs b/crates/parse_lr_lr1/src/lib.rs index 1f86459..bdf78d8 100644 --- a/crates/parse_lr_lr1/src/lib.rs +++ b/crates/parse_lr_lr1/src/lib.rs @@ -102,7 +102,7 @@ where for node in &dfa.nodes { let node = node.read().unwrap(); for (rule, la_token) in node.find_all_by(is_lr1_reduce_state) { - // A -> α β . を含むノードに対して Reduce をマーク + // A -> α β . [la_token] を含む場合,la_token 列に対して Reduce をマーク match la_token { RuleElem::Term(term) => { builder.try_set(node.id, Some(*term), LRAction::Reduce(rule.clone()))?; @@ -113,10 +113,9 @@ where _ => {} } - // S -> Top . を含むノードに対して Accept をマーク - if let Some(_) = node.find_all(&top_dummy).next() { - builder.try_set(node.id, None, LRAction::Accept)?; - continue; + // S -> Top . を含む場合,EOF 列に対して Accept をマーク + if rule == &top_dummy { + builder.set(node.id, None, LRAction::Accept); } } } diff --git a/crates/parse_lr_slr1/src/lib.rs b/crates/parse_lr_slr1/src/lib.rs index e2c076d..4c39188 100644 --- a/crates/parse_lr_slr1/src/lib.rs +++ b/crates/parse_lr_slr1/src/lib.rs @@ -101,14 +101,9 @@ where let mut builder = LRTableBuilder::from(&dfa); for node in dfa.nodes { let node = node.read().unwrap(); - if let Some(rule) = node.find_all_by(is_slr1_reduce_state).next() { - // S -> Top . を含むノードに対して Accept をマーク - if let Some(_) = node.find_all(&top_dummy).next() { - builder.try_set(node.id, None, LRAction::Accept)?; - continue; - } - // A -> α β . を含むノードに対して Reduce をマーク + // A -> α β . を含む場合,Follow(A) 列に対して Reduce をマーク + for rule in node.find_all_by(is_slr1_reduce_state) { let lhs = lhs_as_str(&rule.lhs); for term in follow_set.get(lhs).unwrap() { match term { @@ -121,6 +116,11 @@ where _ => {} } } + + // S -> Top . を含む場合,EOF 列に対して Accept をマーク + if rule == &top_dummy { + builder.set(node.id, None, LRAction::Accept); + } } } let table = builder.build();