Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions crates/algorithm_lr1/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,8 @@ where
{
pub fn setup() -> anyhow::Result<Self> {
// 1. Pre-process
let rules = S::try_into()?
.into_iter()
.map(|(rule, _)| rule)
.collect::<Vec<_>>();
let ruleset = RuleSet::from(rules);
let rules = S::into_iter().collect::<Vec<_>>();
let ruleset = S::into_ruleset();
let first_set = ruleset.first_set();

// 2. Generate dummy nonterm
Expand Down Expand Up @@ -76,7 +73,7 @@ where
let mut goto_table: Vec<Vec<usize>> = Vec::with_capacity(dfa.0.len());
for _ in 0..dfa.0.len() {
action_table.push(HashMap::from_iter(
T::enum_iter()
T::into_iter()
.map(|token| (token, LRAction::None))
.collect::<Vec<(T, LRAction<S>)>>(),
));
Expand All @@ -85,7 +82,6 @@ where
}

// 5. Setup tables
let rule_table: Vec<S> = S::enum_iter().collect();
for lritem_set in &dfa.0 {
for (token, next) in &lritem_set.next {
match &token {
Expand Down Expand Up @@ -113,7 +109,7 @@ where
let id = lritem_set.id as usize;
let label = action_table[id].get_mut(&t.0).unwrap();
*label = LRAction::Reduce(
rule_table[item.rule.id as usize],
rules[item.rule.id as usize],
*nonterm_table.get(lhs).unwrap(),
item.rule.rhs.len(),
);
Expand All @@ -124,7 +120,7 @@ where
LRAction::Accept
} else {
LRAction::Reduce(
rule_table[item.rule.id as usize],
rules[item.rule.id as usize],
*nonterm_table.get(lhs).unwrap(),
item.rule.rhs.len(),
)
Expand Down
9 changes: 2 additions & 7 deletions crates/algorithm_lr1/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,19 +68,14 @@ mod test {
#[derive(Debug, Clone, Copy, Syntax)]
enum TestSyntax {
#[rule("<expr> ::= <expr> Plus <term>")]
ExprPlus,
#[rule("<expr> ::= <expr> Minus <term>")]
ExprMinus,
#[rule("<expr> ::= <term>")]
ExprTerm,
Expr,
#[rule("<term> ::= <term> Mul <num>")]
TermMul,
#[rule("<term> ::= <term> Div <num>")]
TermDiv,
#[rule("<term> ::= <num>")]
TermNum,
Term,
#[rule("<num> ::= BracketL <expr> BracketR")]
NestedNum,
#[rule("<num> ::= Num")]
Num,
}
Expand Down
62 changes: 31 additions & 31 deletions crates/core/src/cfg/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,26 @@ use super::token::TokenSet;

pub trait Syntax<'a>
where
Self: Debug + Clone + Copy + Sized,
Self: Debug + Clone + Copy,
{
type TokenSet: TokenSet<'a>;

fn enum_iter() -> impl Iterator<Item = Self>;
fn to_rule(&self) -> Rule<'a, Self::TokenSet>;
fn into_iter() -> impl Iterator<Item = Self>;
fn into_rules(&self) -> Vec<Rule<'a, Self::TokenSet>>;

fn try_into() -> anyhow::Result<Vec<(Rule<'a, Self::TokenSet>, Self)>> {
Self::enum_iter()
.map(|elem| Ok((Self::to_rule(&elem), elem)))
.collect::<anyhow::Result<Vec<_>>>()
fn into_ruleset() -> RuleSet<'a, Self::TokenSet> {
let rules = Self::into_iter()
.enumerate()
.flat_map(|(idx, elem)| {
let mut rules = Self::into_rules(&elem);
for rule in &mut rules {
rule.id = idx;
}
rules
})
.collect::<Vec<_>>();

RuleSet::from(rules)
}
}

Expand Down Expand Up @@ -108,16 +117,12 @@ pub struct RuleSet<'a, T: TokenSet<'a>> {
}

impl<'a, T: TokenSet<'a>> From<Vec<Rule<'a, T>>> for RuleSet<'a, T> {
fn from(mut rules: Vec<Rule<'a, T>>) -> Self {
fn from(rules: Vec<Rule<'a, T>>) -> Self {
let top = match &rules[0].lhs {
RuleElem::NonTerm(s) => s.clone(),
_ => unreachable!(),
};

for (idx, rule) in rules.iter_mut().enumerate() {
rule.id = idx;
}

RuleSet {
top,
rules,
Expand Down Expand Up @@ -228,7 +233,7 @@ impl<'a, T: TokenSet<'a>> RuleSet<'a, T> {
mod test {
use std::collections::HashMap;

use super::{TokenSet, Syntax, Rule, RuleElem, RuleSet};
use super::{TokenSet, Syntax, Rule, RuleElem};

#[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)]
enum TestToken {
Expand All @@ -242,7 +247,7 @@ mod test {
}

impl TokenSet<'_> for TestToken {
fn enum_iter() -> impl Iterator<Item = Self> {
fn into_iter() -> impl Iterator<Item = Self> {
Box::new(
vec![
TestToken::Num,
Expand All @@ -257,7 +262,7 @@ mod test {
)
}

fn to_regex(&self) -> &'static str {
fn into_regex_str(&self) -> &'static str {
match self {
TestToken::Num => r"^[1-9][0-9]*",
TestToken::Plus => r"^\+",
Expand Down Expand Up @@ -289,7 +294,7 @@ mod test {
impl<'a> Syntax<'a> for TestSyntax {
type TokenSet = TestToken;

fn enum_iter() -> impl Iterator<Item = Self> {
fn into_iter() -> impl Iterator<Item = Self> {
Box::new(
vec![
TestSyntax::ExprPlus,
Expand All @@ -305,7 +310,7 @@ mod test {
)
}

fn to_rule(&self) -> Rule<'a, Self::TokenSet> {
fn into_rules(&self) -> Vec<Rule<'a, Self::TokenSet>> {
let expr_plus = Rule::from((
RuleElem::new_nonterm("expr"),
vec![
Expand Down Expand Up @@ -364,14 +369,14 @@ mod test {
let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![]));

match self {
TestSyntax::ExprPlus => expr_plus,
TestSyntax::ExprMinus => expr_minus,
TestSyntax::Expr2Term => expr_2_term,
TestSyntax::TermMul => term_mul,
TestSyntax::TermDiv => term_div,
TestSyntax::Term2Fact => term_2_fact,
TestSyntax::Fact2Expr => fact_2_expr,
TestSyntax::Fact2Num => fact_2_num,
TestSyntax::ExprPlus => vec![expr_plus],
TestSyntax::ExprMinus => vec![expr_minus],
TestSyntax::Expr2Term => vec![expr_2_term],
TestSyntax::TermMul => vec![term_mul],
TestSyntax::TermDiv => vec![term_div],
TestSyntax::Term2Fact => vec![term_2_fact],
TestSyntax::Fact2Expr => vec![fact_2_expr],
TestSyntax::Fact2Num => vec![fact_2_num],
}
}
}
Expand Down Expand Up @@ -399,12 +404,7 @@ mod test {

#[test]
fn first_set() {
let rules = <TestSyntax as Syntax>::try_into()
.unwrap()
.into_iter()
.map(|(rule, _)| rule)
.collect::<Vec<_>>();
let ruleset = RuleSet::from(rules);
let ruleset = <TestSyntax as Syntax>::into_ruleset();
let first_set = ruleset.first_set();

check(
Expand Down
20 changes: 13 additions & 7 deletions crates/core/src/cfg/token.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
use std::fmt::Debug;
use std::hash::Hash;

use regex::Regex;
use regex::{Regex, RegexSet};

pub trait TokenSet<'a>
where
Self: Debug + Copy + Clone + Hash + Eq,
{
fn ignore_str() -> &'a str;
fn enum_iter() -> impl Iterator<Item = Self>;
fn to_regex(&self) -> &'a str;
fn into_iter() -> impl Iterator<Item = Self>;
fn into_regex_str(&self) -> &'a str;

fn try_into() -> anyhow::Result<Vec<(Regex, Self)>> {
Self::enum_iter()
.map(|token| Ok((Regex::new(Self::to_regex(&token))?, token)))
.collect::<anyhow::Result<Vec<_>>>()
fn into_regex(&self) -> anyhow::Result<Regex> {
Ok(Regex::new(self.into_regex_str())?)
}

fn try_into_regexset() -> anyhow::Result<RegexSet> {
let regex_set = Self::into_iter()
.map(|token| Self::into_regex_str(&token))
.collect::<Vec<_>>();

Ok(RegexSet::new(regex_set)?)
}
}
20 changes: 9 additions & 11 deletions crates/core/src/lex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ impl Lexer {

struct LexDriver<'a, 'b, T: TokenSet<'a>> {
// Regex
regex_istr: Regex,
regex_set: RegexSet,
regex_map: Vec<(Regex, T)>,
regex_istr: Regex,

// State
input: &'b str,
Expand All @@ -57,18 +57,16 @@ impl<'a, 'b, T: TokenSet<'a>> TryFrom<&'b str> for LexDriver<'a, 'b, T> {
type Error = anyhow::Error;

fn try_from(input: &'b str) -> anyhow::Result<Self> {
let regex_map = T::try_into()?;
let regex_set = regex_map
.iter()
.map(|(_, token)| T::to_regex(&token))
.collect::<Vec<_>>();
let regex_set = RegexSet::new(regex_set)?;
let regex_istr = Regex::new(T::ignore_str())?;
let regex_set = T::try_into_regexset()?;
let regex_map = T::into_iter()
.map(|token| Ok((token.into_regex()?, token)))
.collect::<anyhow::Result<Vec<_>>>()?;

Ok(LexDriver {
regex_istr,
regex_set,
regex_map,
regex_istr,
input,
pos: 0,
tokenset: PhantomData,
Expand Down Expand Up @@ -126,11 +124,11 @@ mod test {
r"^[ \t\n]+"
}

fn enum_iter() -> Box<dyn Iterator<Item = Self>> {
Box::new(vec![TestToken::Num, TestToken::Plus].into_iter())
fn into_iter() -> impl Iterator<Item = Self> {
vec![TestToken::Num, TestToken::Plus].into_iter()
}

fn to_regex(&self) -> &'static str {
fn into_regex_str(&self) -> &'static str {
match self {
TestToken::Num => r"^[1-9][0-9]*",
TestToken::Plus => r"^\+",
Expand Down
20 changes: 11 additions & 9 deletions crates/core_derive/src/impl/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ pub fn syntax_proc_macro_impl(ast: DeriveInput) -> TokenStream {
impl<'a> Syntax<'a> for #enum_name {
type TokenSet = #enum_assoc_type;

fn enum_iter() -> impl Iterator<Item = Self> {
fn into_iter() -> impl Iterator<Item = Self> {
vec![
#( #enum_variants, )*
].into_iter()
}

fn to_rule(&self) -> Rule<'a, Self::TokenSet> {
fn into_rules(&self) -> Vec<Rule<'a, Self::TokenSet>> {
match self {
#( #enum_rule_table, )*
_ => unimplemented!(),
Expand All @@ -50,23 +50,23 @@ pub fn syntax_proc_macro_impl(ast: DeriveInput) -> TokenStream {
struct VariantInfo<'a> {
parent_ident: &'a Ident,
self_ident: &'a Ident,
rule: Option<TokenStream>,
rules: Vec<TokenStream>,
}

impl<'a> VariantInfo<'a> {
fn parse(parent_ident: &'a Ident, variant: &'a Variant) -> VariantInfo<'a> {
let self_ident = &variant.ident;

let mut rule = None;
let mut rules = vec![];
for attr in &variant.attrs {
let attr = attr.parse_args::<LitStr>().unwrap().value();
rule = Some(Self::parse_rule(&attr));
rules.push(Self::parse_rule(&attr));
}

VariantInfo {
parent_ident,
self_ident,
rule,
rules,
}
}

Expand Down Expand Up @@ -102,9 +102,11 @@ impl<'a> VariantInfo<'a> {

fn gen_ident_with_rule(&self) -> TokenStream {
let ident = self.gen_ident();
match &self.rule {
Some(rule) => quote! { #ident => #rule },
None => quote! { unimplemented!() },
if self.rules.is_empty() {
quote! { #ident => unimplemented!() }
} else {
let rules = &self.rules;
quote! { #ident => vec![#(#rules),*] }
}
}
}
4 changes: 2 additions & 2 deletions crates/core_derive/src/impl/tokenset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream {
#enum_ignored
}

fn enum_iter() -> impl Iterator<Item = Self> {
fn into_iter() -> impl Iterator<Item = Self> {
vec![
#( #enum_variants, )*
].into_iter()
}

fn to_regex(&self) -> &'static str {
fn into_regex_str(&self) -> &'static str {
match self {
#( #enum_regex_table, )*
_ => unimplemented!(),
Expand Down
9 changes: 2 additions & 7 deletions examples/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,14 @@ enum ExprTokenSet {
#[derive(Debug, Clone, Copy, Syntax)]
enum ExprSyntax {
#[rule("<expr> ::= <expr> Plus <term>")]
ExprPlus,
#[rule("<expr> ::= <expr> Minus <term>")]
ExprMinus,
#[rule("<expr> ::= <term>")]
ExprTerm,
Expr,
#[rule("<term> ::= <term> Mul <num>")]
TermMul,
#[rule("<term> ::= <term> Div <num>")]
TermDiv,
#[rule("<term> ::= <num>")]
TermNum,
Term,
#[rule("<num> ::= BracketL <expr> BracketR")]
NestedNum,
#[rule("<num> ::= Num")]
Num,
}
Expand Down
Loading