diff --git a/crates/algorithm_lr1/src/builder.rs b/crates/algorithm_lr1/src/builder.rs index 5635f8f..488b96f 100644 --- a/crates/algorithm_lr1/src/builder.rs +++ b/crates/algorithm_lr1/src/builder.rs @@ -37,11 +37,8 @@ where { pub fn setup() -> anyhow::Result { // 1. Pre-process - let rules = S::try_into()? - .into_iter() - .map(|(rule, _)| rule) - .collect::>(); - let ruleset = RuleSet::from(rules); + let rules = S::into_iter().collect::>(); + let ruleset = S::into_ruleset(); let first_set = ruleset.first_set(); // 2. Generate dummy nonterm @@ -76,7 +73,7 @@ where let mut goto_table: Vec> = Vec::with_capacity(dfa.0.len()); for _ in 0..dfa.0.len() { action_table.push(HashMap::from_iter( - T::enum_iter() + T::into_iter() .map(|token| (token, LRAction::None)) .collect::)>>(), )); @@ -85,7 +82,6 @@ where } // 5. Setup tables - let rule_table: Vec = S::enum_iter().collect(); for lritem_set in &dfa.0 { for (token, next) in &lritem_set.next { match &token { @@ -113,7 +109,7 @@ where let id = lritem_set.id as usize; let label = action_table[id].get_mut(&t.0).unwrap(); *label = LRAction::Reduce( - rule_table[item.rule.id as usize], + rules[item.rule.id as usize], *nonterm_table.get(lhs).unwrap(), item.rule.rhs.len(), ); @@ -124,7 +120,7 @@ where LRAction::Accept } else { LRAction::Reduce( - rule_table[item.rule.id as usize], + rules[item.rule.id as usize], *nonterm_table.get(lhs).unwrap(), item.rule.rhs.len(), ) diff --git a/crates/algorithm_lr1/src/lib.rs b/crates/algorithm_lr1/src/lib.rs index a5db5a4..88cd782 100644 --- a/crates/algorithm_lr1/src/lib.rs +++ b/crates/algorithm_lr1/src/lib.rs @@ -68,19 +68,14 @@ mod test { #[derive(Debug, Clone, Copy, Syntax)] enum TestSyntax { #[rule(" ::= Plus ")] - ExprPlus, #[rule(" ::= Minus ")] - ExprMinus, #[rule(" ::= ")] - ExprTerm, + Expr, #[rule(" ::= Mul ")] - TermMul, #[rule(" ::= Div ")] - TermDiv, #[rule(" ::= ")] - TermNum, + Term, #[rule(" ::= BracketL BracketR")] - NestedNum, #[rule(" ::= Num")] Num, } diff --git a/crates/core/src/cfg/syntax.rs b/crates/core/src/cfg/syntax.rs index 0b631ab..a41d085 100644 --- a/crates/core/src/cfg/syntax.rs +++ b/crates/core/src/cfg/syntax.rs @@ -7,17 +7,26 @@ use super::token::TokenSet; pub trait Syntax<'a> where - Self: Debug + Clone + Copy + Sized, + Self: Debug + Clone + Copy, { type TokenSet: TokenSet<'a>; - fn enum_iter() -> impl Iterator; - fn to_rule(&self) -> Rule<'a, Self::TokenSet>; + fn into_iter() -> impl Iterator; + fn into_rules(&self) -> Vec>; - fn try_into() -> anyhow::Result, Self)>> { - Self::enum_iter() - .map(|elem| Ok((Self::to_rule(&elem), elem))) - .collect::>>() + fn into_ruleset() -> RuleSet<'a, Self::TokenSet> { + let rules = Self::into_iter() + .enumerate() + .flat_map(|(idx, elem)| { + let mut rules = Self::into_rules(&elem); + for rule in &mut rules { + rule.id = idx; + } + rules + }) + .collect::>(); + + RuleSet::from(rules) } } @@ -108,16 +117,12 @@ pub struct RuleSet<'a, T: TokenSet<'a>> { } impl<'a, T: TokenSet<'a>> From>> for RuleSet<'a, T> { - fn from(mut rules: Vec>) -> Self { + fn from(rules: Vec>) -> Self { let top = match &rules[0].lhs { RuleElem::NonTerm(s) => s.clone(), _ => unreachable!(), }; - for (idx, rule) in rules.iter_mut().enumerate() { - rule.id = idx; - } - RuleSet { top, rules, @@ -228,7 +233,7 @@ impl<'a, T: TokenSet<'a>> RuleSet<'a, T> { mod test { use std::collections::HashMap; - use super::{TokenSet, Syntax, Rule, RuleElem, RuleSet}; + use super::{TokenSet, Syntax, Rule, RuleElem}; #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] enum TestToken { @@ -242,7 +247,7 @@ mod test { } impl TokenSet<'_> for TestToken { - fn enum_iter() -> impl Iterator { + fn into_iter() -> impl Iterator { Box::new( vec![ TestToken::Num, @@ -257,7 +262,7 @@ mod test { ) } - fn to_regex(&self) -> &'static str { + fn into_regex_str(&self) -> &'static str { match self { TestToken::Num => r"^[1-9][0-9]*", TestToken::Plus => r"^\+", @@ -289,7 +294,7 @@ mod test { impl<'a> Syntax<'a> for TestSyntax { type TokenSet = TestToken; - fn enum_iter() -> impl Iterator { + fn into_iter() -> impl Iterator { Box::new( vec![ TestSyntax::ExprPlus, @@ -305,7 +310,7 @@ mod test { ) } - fn to_rule(&self) -> Rule<'a, Self::TokenSet> { + fn into_rules(&self) -> Vec> { let expr_plus = Rule::from(( RuleElem::new_nonterm("expr"), vec![ @@ -364,14 +369,14 @@ mod test { let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![])); match self { - TestSyntax::ExprPlus => expr_plus, - TestSyntax::ExprMinus => expr_minus, - TestSyntax::Expr2Term => expr_2_term, - TestSyntax::TermMul => term_mul, - TestSyntax::TermDiv => term_div, - TestSyntax::Term2Fact => term_2_fact, - TestSyntax::Fact2Expr => fact_2_expr, - TestSyntax::Fact2Num => fact_2_num, + TestSyntax::ExprPlus => vec![expr_plus], + TestSyntax::ExprMinus => vec![expr_minus], + TestSyntax::Expr2Term => vec![expr_2_term], + TestSyntax::TermMul => vec![term_mul], + TestSyntax::TermDiv => vec![term_div], + TestSyntax::Term2Fact => vec![term_2_fact], + TestSyntax::Fact2Expr => vec![fact_2_expr], + TestSyntax::Fact2Num => vec![fact_2_num], } } } @@ -399,12 +404,7 @@ mod test { #[test] fn first_set() { - let rules = ::try_into() - .unwrap() - .into_iter() - .map(|(rule, _)| rule) - .collect::>(); - let ruleset = RuleSet::from(rules); + let ruleset = ::into_ruleset(); let first_set = ruleset.first_set(); check( diff --git a/crates/core/src/cfg/token.rs b/crates/core/src/cfg/token.rs index 7eb1032..e3a6d3f 100644 --- a/crates/core/src/cfg/token.rs +++ b/crates/core/src/cfg/token.rs @@ -1,19 +1,25 @@ use std::fmt::Debug; use std::hash::Hash; -use regex::Regex; +use regex::{Regex, RegexSet}; pub trait TokenSet<'a> where Self: Debug + Copy + Clone + Hash + Eq, { fn ignore_str() -> &'a str; - fn enum_iter() -> impl Iterator; - fn to_regex(&self) -> &'a str; + fn into_iter() -> impl Iterator; + fn into_regex_str(&self) -> &'a str; - fn try_into() -> anyhow::Result> { - Self::enum_iter() - .map(|token| Ok((Regex::new(Self::to_regex(&token))?, token))) - .collect::>>() + fn into_regex(&self) -> anyhow::Result { + Ok(Regex::new(self.into_regex_str())?) + } + + fn try_into_regexset() -> anyhow::Result { + let regex_set = Self::into_iter() + .map(|token| Self::into_regex_str(&token)) + .collect::>(); + + Ok(RegexSet::new(regex_set)?) } } diff --git a/crates/core/src/lex.rs b/crates/core/src/lex.rs index 44b2484..240da9f 100644 --- a/crates/core/src/lex.rs +++ b/crates/core/src/lex.rs @@ -41,9 +41,9 @@ impl Lexer { struct LexDriver<'a, 'b, T: TokenSet<'a>> { // Regex + regex_istr: Regex, regex_set: RegexSet, regex_map: Vec<(Regex, T)>, - regex_istr: Regex, // State input: &'b str, @@ -57,18 +57,16 @@ impl<'a, 'b, T: TokenSet<'a>> TryFrom<&'b str> for LexDriver<'a, 'b, T> { type Error = anyhow::Error; fn try_from(input: &'b str) -> anyhow::Result { - let regex_map = T::try_into()?; - let regex_set = regex_map - .iter() - .map(|(_, token)| T::to_regex(&token)) - .collect::>(); - let regex_set = RegexSet::new(regex_set)?; let regex_istr = Regex::new(T::ignore_str())?; + let regex_set = T::try_into_regexset()?; + let regex_map = T::into_iter() + .map(|token| Ok((token.into_regex()?, token))) + .collect::>>()?; Ok(LexDriver { + regex_istr, regex_set, regex_map, - regex_istr, input, pos: 0, tokenset: PhantomData, @@ -126,11 +124,11 @@ mod test { r"^[ \t\n]+" } - fn enum_iter() -> Box> { - Box::new(vec![TestToken::Num, TestToken::Plus].into_iter()) + fn into_iter() -> impl Iterator { + vec![TestToken::Num, TestToken::Plus].into_iter() } - fn to_regex(&self) -> &'static str { + fn into_regex_str(&self) -> &'static str { match self { TestToken::Num => r"^[1-9][0-9]*", TestToken::Plus => r"^\+", diff --git a/crates/core_derive/src/impl/syntax.rs b/crates/core_derive/src/impl/syntax.rs index b1e8574..83324ce 100644 --- a/crates/core_derive/src/impl/syntax.rs +++ b/crates/core_derive/src/impl/syntax.rs @@ -31,13 +31,13 @@ pub fn syntax_proc_macro_impl(ast: DeriveInput) -> TokenStream { impl<'a> Syntax<'a> for #enum_name { type TokenSet = #enum_assoc_type; - fn enum_iter() -> impl Iterator { + fn into_iter() -> impl Iterator { vec![ #( #enum_variants, )* ].into_iter() } - fn to_rule(&self) -> Rule<'a, Self::TokenSet> { + fn into_rules(&self) -> Vec> { match self { #( #enum_rule_table, )* _ => unimplemented!(), @@ -50,23 +50,23 @@ pub fn syntax_proc_macro_impl(ast: DeriveInput) -> TokenStream { struct VariantInfo<'a> { parent_ident: &'a Ident, self_ident: &'a Ident, - rule: Option, + rules: Vec, } impl<'a> VariantInfo<'a> { fn parse(parent_ident: &'a Ident, variant: &'a Variant) -> VariantInfo<'a> { let self_ident = &variant.ident; - let mut rule = None; + let mut rules = vec![]; for attr in &variant.attrs { let attr = attr.parse_args::().unwrap().value(); - rule = Some(Self::parse_rule(&attr)); + rules.push(Self::parse_rule(&attr)); } VariantInfo { parent_ident, self_ident, - rule, + rules, } } @@ -102,9 +102,11 @@ impl<'a> VariantInfo<'a> { fn gen_ident_with_rule(&self) -> TokenStream { let ident = self.gen_ident(); - match &self.rule { - Some(rule) => quote! { #ident => #rule }, - None => quote! { unimplemented!() }, + if self.rules.is_empty() { + quote! { #ident => unimplemented!() } + } else { + let rules = &self.rules; + quote! { #ident => vec![#(#rules),*] } } } } diff --git a/crates/core_derive/src/impl/tokenset.rs b/crates/core_derive/src/impl/tokenset.rs index 84bcef3..1cd4df7 100644 --- a/crates/core_derive/src/impl/tokenset.rs +++ b/crates/core_derive/src/impl/tokenset.rs @@ -36,13 +36,13 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { #enum_ignored } - fn enum_iter() -> impl Iterator { + fn into_iter() -> impl Iterator { vec![ #( #enum_variants, )* ].into_iter() } - fn to_regex(&self) -> &'static str { + fn into_regex_str(&self) -> &'static str { match self { #( #enum_regex_table, )* _ => unimplemented!(), diff --git a/examples/expr.rs b/examples/expr.rs index 6ae7d5d..a2a33b5 100644 --- a/examples/expr.rs +++ b/examples/expr.rs @@ -28,19 +28,14 @@ enum ExprTokenSet { #[derive(Debug, Clone, Copy, Syntax)] enum ExprSyntax { #[rule(" ::= Plus ")] - ExprPlus, #[rule(" ::= Minus ")] - ExprMinus, #[rule(" ::= ")] - ExprTerm, + Expr, #[rule(" ::= Mul ")] - TermMul, #[rule(" ::= Div ")] - TermDiv, #[rule(" ::= ")] - TermNum, + Term, #[rule(" ::= BracketL BracketR")] - NestedNum, #[rule(" ::= Num")] Num, } diff --git a/tests/derive.rs b/tests/derive.rs index 81e8db1..87d107f 100644 --- a/tests/derive.rs +++ b/tests/derive.rs @@ -23,25 +23,20 @@ enum TestTokenSet { #[derive(Debug, Clone, Copy, Syntax)] enum TestSyntax { #[rule(" ::= Plus ")] - ExprPlus, #[rule(" ::= Minus ")] - ExprMinus, #[rule(" ::= ")] - ExprTerm, + Expr, #[rule(" ::= Mul ")] - TermMul, #[rule(" ::= Div ")] - TermDiv, #[rule(" ::= ")] - TermNum, + Term, #[rule(" ::= BracketL BracketR")] - NestedNum, #[rule(" ::= Num")] Num, } #[test] fn check_compile() { - let _ = TestTokenSet::to_regex(&self::TestTokenSet::Plus); - let _ = TestSyntax::to_rule(&self::TestSyntax::ExprPlus); + let _ = TestTokenSet::into_regex(&self::TestTokenSet::Plus); + let _ = TestSyntax::into_rules(&self::TestSyntax::Expr); } diff --git a/tests/serde.rs b/tests/serde.rs index 24ddfba..ce2a274 100644 --- a/tests/serde.rs +++ b/tests/serde.rs @@ -27,19 +27,14 @@ enum TestTokenSet { #[derive(Debug, Clone, Copy, Serialize, Deserialize, Syntax)] enum TestSyntax { #[rule(" ::= Plus ")] - ExprPlus, #[rule(" ::= Minus ")] - ExprMinus, #[rule(" ::= ")] - ExprTerm, + Expr, #[rule(" ::= Mul ")] - TermMul, #[rule(" ::= Div ")] - TermDiv, #[rule(" ::= ")] - TermNum, + Term, #[rule(" ::= BracketL BracketR")] - NestedNum, #[rule(" ::= Num")] Num, }