diff --git a/Cargo.lock b/Cargo.lock index 7768a14..3fa6113 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -19,7 +19,7 @@ checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" [[package]] name = "copager" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "copager", @@ -32,22 +32,32 @@ dependencies = [ "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_common", + "copager_parse_lr_common", + "copager_parse_lr_lalr1", + "copager_parse_lr_lr0", + "copager_parse_lr_lr1", + "copager_parse_lr_slr1", + "example_lang_arithmetic", + "example_lang_json", + "example_lang_pl0", + "example_lang_xml", "serde", "serde_json", ] [[package]] name = "copager_cfg" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", + "serde", "thiserror", ] [[package]] name = "copager_core" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "copager_cfg", @@ -57,7 +67,7 @@ dependencies = [ "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_lr_lr1", "copager_utils", "serde", "serde_cbor", @@ -66,7 +76,7 @@ dependencies = [ [[package]] name = "copager_core_macros" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "proc-macro2", @@ -77,7 +87,7 @@ dependencies = [ [[package]] name = "copager_ir" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "copager_cfg", @@ -88,22 +98,23 @@ dependencies = [ [[package]] name = "copager_ir_sexp" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "copager_cfg", + "copager_core", "copager_ir", "copager_ir_sexp", "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_lr_lr1", "thiserror", ] [[package]] name = "copager_ir_void" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "copager_cfg", @@ -115,7 +126,7 @@ dependencies = [ [[package]] name = "copager_lex" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "copager_cfg", @@ -126,7 +137,7 @@ dependencies = [ [[package]] name = "copager_lex_derive" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "copager_cfg", @@ -139,7 +150,7 @@ dependencies = [ [[package]] name = "copager_lex_regex" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "copager_cfg", @@ -153,7 +164,7 @@ dependencies = [ [[package]] name = "copager_parse" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "copager_cfg", @@ -163,9 +174,20 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_parse_common" +version = "0.3.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "copager_parse", + "thiserror", +] + [[package]] name = "copager_parse_derive" -version = "0.2.0" +version = "0.3.0" dependencies = [ "anyhow", "copager_cfg", @@ -178,39 +200,100 @@ dependencies = [ ] [[package]] -name = "copager_parse_lr1" -version = "0.2.0" +name = "copager_parse_lr_common" +version = "0.3.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_core", + "copager_parse", + "copager_parse_common", + "serde", + "thiserror", +] + +[[package]] +name = "copager_parse_lr_lalr1" +version = "0.3.0" dependencies = [ "anyhow", "copager_cfg", "copager_core", + "copager_ir_void", "copager_lex", "copager_lex_regex", "copager_parse", - "copager_parse_lr1", + "copager_parse_common", + "copager_parse_lr_common", "copager_utils", - "itertools", "serde", "thiserror", ] [[package]] -name = "copager_utils" -version = "0.1.1" +name = "copager_parse_lr_lr0" +version = "0.3.0" dependencies = [ "anyhow", + "copager_cfg", + "copager_core", + "copager_ir_void", + "copager_lex", + "copager_lex_regex", + "copager_parse", + "copager_parse_lr_common", + "copager_utils", "serde", "thiserror", ] [[package]] -name = "either" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +name = "copager_parse_lr_lr1" +version = "0.3.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_core", + "copager_ir_void", + "copager_lex", + "copager_lex_regex", + "copager_parse", + "copager_parse_common", + "copager_parse_lr_common", + "copager_utils", + "serde", + "thiserror", +] [[package]] -name = "example_oneshot" +name = "copager_parse_lr_slr1" +version = "0.3.0" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_core", + "copager_ir_void", + "copager_lex", + "copager_lex_regex", + "copager_parse", + "copager_parse_common", + "copager_parse_lr_common", + "copager_utils", + "serde", + "thiserror", +] + +[[package]] +name = "copager_utils" +version = "0.3.0" +dependencies = [ + "anyhow", + "serde", + "thiserror", +] + +[[package]] +name = "example_build_oneshot" version = "0.1.0" dependencies = [ "anyhow", @@ -219,18 +302,54 @@ dependencies = [ ] [[package]] -name = "example_prebuild" +name = "example_build_prebuild" version = "0.1.0" dependencies = [ "anyhow", "copager", - "example_prebuild_grammar", + "example_prebuild_language", "serde", "thiserror", ] [[package]] -name = "example_prebuild_grammar" +name = "example_lang_arithmetic" +version = "0.1.0" +dependencies = [ + "anyhow", + "copager", + "thiserror", +] + +[[package]] +name = "example_lang_json" +version = "0.1.0" +dependencies = [ + "anyhow", + "copager", + "thiserror", +] + +[[package]] +name = "example_lang_pl0" +version = "0.1.0" +dependencies = [ + "anyhow", + "copager", + "thiserror", +] + +[[package]] +name = "example_lang_xml" +version = "0.1.0" +dependencies = [ + "anyhow", + "copager", + "thiserror", +] + +[[package]] +name = "example_prebuild_language" version = "0.1.0" dependencies = [ "anyhow", @@ -245,15 +364,6 @@ version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "1.0.11" diff --git a/Cargo.toml b/Cargo.toml index 61469d6..c87e0e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ cargo-features = ["edition2024"] [package] name = "copager" -version = "0.2.0" +version = "0.3.0" edition = "2024" [dependencies] @@ -13,7 +13,12 @@ copager_cfg = { path = "./crates/cfg" } copager_lex = { path = "./crates/lex", optional = true } copager_lex_regex = { path = "./crates/lex_regex", optional = true } copager_parse = { path = "./crates/parse", optional = true } -copager_parse_lr1 = { path = "./crates/parse_lr1", optional = true } +copager_parse_common = { path = "./crates/parse_common", optional = true } +copager_parse_lr_common = { path = "./crates/parse_lr_common", optional = true } +copager_parse_lr_lr0 = { path = "./crates/parse_lr_lr0", optional = true } +copager_parse_lr_lr1 = { path = "./crates/parse_lr_lr1", optional = true } +copager_parse_lr_slr1 = { path = "./crates/parse_lr_slr1", optional = true } +copager_parse_lr_lalr1 = { path = "./crates/parse_lr_lalr1", optional = true } copager_ir = { path = "./crates/ir" } copager_ir_void = { path = "./crates/ir_void", optional = true } copager_ir_sexp = { path = "./crates/ir_sexp", optional = true } @@ -23,19 +28,34 @@ anyhow = { workspace = true } serde = { workspace = true } serde_json = "1.0.117" copager = { path = ".", features = ["all"] } +example_lang_arithmetic = { path = "./examples/lang_arithmetic" } +example_lang_json = { path = "./examples/lang_json" } +example_lang_pl0 = { path = "./examples/lang_pl0" } +example_lang_xml = { path = "./examples/lang_xml" } [features] +# all +all = [ + "prebuild", "derive", "dev", # common + "regexlex", # lex + "lr0", "lr1", "slr1", "lalr1", # parse + "void", "sexp" # ir +] + # common default = ["dep:copager_lex", "dep:copager_parse"] -all = ["prebuild", "derive", "regexlex", "lr1", "void", "sexp"] prebuild = ["dep:serde_json"] derive = ["copager_lex/derive", "copager_parse/derive"] +dev = ["dep:copager_parse_common", "dep:copager_parse_lr_common"] # lex regexlex = ["dep:copager_lex_regex"] # parse -lr1 = ["dep:copager_parse_lr1"] +lr0 = ["dep:copager_parse_lr_lr0"] +lr1 = ["dep:copager_parse_lr_lr1"] +slr1 = ["dep:copager_parse_lr_slr1"] +lalr1 = ["dep:copager_parse_lr_lalr1"] # ir void = ["dep:copager_ir_void"] @@ -52,16 +72,25 @@ members = [ "./crates/lex_derive", "./crates/lex_regex", "./crates/parse", + "./crates/parse_common", "./crates/parse_derive", - "./crates/parse_lr1", + "./crates/parse_lr_common", + "./crates/parse_lr_lr0", + "./crates/parse_lr_lr1", + "./crates/parse_lr_slr1", + "./crates/parse_lr_lalr1", "./crates/ir", "./crates/ir_void", "./crates/ir_sexp", "./crates/utils", # Examples - "./examples/oneshot", - "./examples/prebuild", + "./examples/build_oneshot", + "./examples/build_prebuild", + "./examples/lang_arithmetic", + "./examples/lang_json", + "./examples/lang_pl0", + "./examples/lang_xml", ] exclude = [] @@ -69,3 +98,19 @@ exclude = [] anyhow = "1.0.82" thiserror = "1.0.58" serde = { version = "1.0.197", features = ["derive"] } + +[[test]] +name = "test_by_arithmetic" +path = "./tests/arithmetic/test.rs" + +[[test]] +name = "test_by_json" +path = "./tests/json/test.rs" + +[[test]] +name = "test_by_pl0" +path = "./tests/pl0/test.rs" + +[[test]] +name = "test_by_xml" +path = "./tests/xml/test.rs" diff --git a/README.md b/README.md index 0c40792..7db2b92 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Copager -Rust製パーサジェネレータ +「言語処理系生成系」の生成系(**Constructible** **Pa**rser **Ge**nerator on **R**ust) ## Features @@ -9,6 +9,7 @@ Rust製パーサジェネレータ - `all` - `derive` - `prebuild` +- `dev` ### Lex @@ -16,29 +17,42 @@ Rust製パーサジェネレータ ### Parse +- `lr0` : [crates/parse_lr0](crates/parse_lr0) - `lr1` : [crates/parse_lr1](crates/parse_lr1) +- `slr1` : [crates/parse_slr1](crates/parse_slr1) +- `lalr1` : [crates/parse_lalr1](crates/parse_lalr1) ### IR - `void` : [crates/ir_void](crates/ir_void) - `sexp` : [crates/ir_sexp](crates/ir_sexp) -## Examples +``` +// RegexLex(lex) + LR1(parse) + SExp(ir) +copager = { ..., features = ["derive", "regexlex", "lr1", "sexp"] } + +// RegexLex(lex) + LALR1(parse) + Void(ir) +copager = { ..., features = ["derive", "regexlex", "lalr1", "void"] } +``` -### One-shot +## Examples -[examples/oneshot](examples/oneshot) +- [example_build_oneshot](examples/build_oneshot) +- [example_build_prebuild](examples/build_prebuild) +- [example_lang_arithmetic](examples/lang_arithmetic) +- [example_lang_json](examples/lang_json) +- [example_lang_pl0](examples/lang_pl0) +- [example_lang_xml](examples/lang_xml) ``` -$ echo "10 * (20 + 30)" | cargo run -p example_oneshot -Success : (Expr (Term (Term (Num "10")) "*" (Num "(" (Expr (Expr (Term (Num "20"))) "+" (Term (Num "30"))) ")"))) +$ cargo run -p example_build_oneshot +Example +Input: 10 * 20 + 30 +Success: (Expr (Expr (Term (Term (Num "10")) "*" (Num "20"))) "+" (Term (Num "30"))) ``` -### Pre-build - -[examples/prebuild](examples/prebuild) +## Test ``` -$ echo "10 * (20 + 30)" | cargo run -p example_prebuild -Success : (Expr (Term (Term (Num "10")) "*" (Num "(" (Expr (Expr (Term (Num "20"))) "+" (Term (Num "30"))) ")"))) +$ cargo test ``` diff --git a/crates/cfg/Cargo.toml b/crates/cfg/Cargo.toml index 220fec0..d960f3c 100644 --- a/crates/cfg/Cargo.toml +++ b/crates/cfg/Cargo.toml @@ -1,8 +1,9 @@ [package] name = "copager_cfg" -version = "0.2.0" +version = "0.3.0" edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 2f1bd24..156bef0 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -1,30 +1,89 @@ -use std::collections::HashMap; -use std::fmt::Debug; +use std::collections::HashSet; +use std::fmt::{Display, Debug}; use std::hash::Hash; +use serde::{Serialize, Deserialize}; + use crate::token::TokenTag; pub trait RuleTag where Self: Debug + Copy + Clone + Hash + Eq, { - fn as_rules(&self) -> Vec>; + fn as_rules(&self) -> Vec>; } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Rule { +#[derive(Clone, Eq, Serialize, Deserialize)] +pub struct Rule +where + T: TokenTag, + R: RuleTag, +{ + #[serde(bound( + serialize = "T: Serialize, R: Serialize", + deserialize = "T: Deserialize<'de>, R: Deserialize<'de>", + ))] pub id: usize, + pub tag: Option, pub lhs: RuleElem, pub rhs: Vec>, } -impl From<(RuleElem, Vec>)> for Rule { - fn from((lhs, rhs): (RuleElem, Vec>)) -> Self { - Rule { id: 0, lhs, rhs } +impl Display for Rule +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} ->", self.lhs)?; + for elem in &self.rhs { + write!(f, " {}", elem)?; + } + write!(f, "") + } +} + +impl Debug for Rule +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} ({})", self, self.id) + } +} + +impl PartialEq for Rule +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.tag == other.tag && self.lhs == other.lhs && self.rhs == other.rhs + } +} + +impl Hash for Rule +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.tag.hash(state); + self.lhs.hash(state); + self.rhs.hash(state); } } -impl Rule { +impl Rule +where + T: TokenTag, + R: RuleTag, +{ + pub fn new(tag: Option, lhs: RuleElem, rhs: Vec>) -> Self { + Rule { id: 0, tag, lhs, rhs } + } + pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { let mut l_nonterms = vec![&self.lhs]; let r_nonterms: Vec<&RuleElem> = self @@ -44,28 +103,41 @@ impl Rule { } } -#[derive(Debug, Clone, Eq)] +#[derive(Clone, Hash, Eq, Serialize, Deserialize)] pub enum RuleElem { + #[serde(bound( + serialize = "T: Serialize", + deserialize = "T: Deserialize<'de>", + ))] NonTerm(String), Term(T), + Epsilon, EOF, } -impl Hash for RuleElem { - fn hash(&self, state: &mut H) { +impl Display for RuleElem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - RuleElem::NonTerm(s) => s.hash(state), - RuleElem::Term(t) => t.hash(state), - RuleElem::EOF => 0.hash(state), + RuleElem::NonTerm(s) => write!(f, "<{}>", s), + RuleElem::Term(t) => write!(f, "{:?}", t.as_str()), + RuleElem::Epsilon => write!(f, "ε"), + RuleElem::EOF => write!(f, "$"), } } } +impl Debug for RuleElem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + impl PartialEq for RuleElem { fn eq(&self, other: &Self) -> bool { match (self, other) { (RuleElem::NonTerm(s1), RuleElem::NonTerm(s2)) => s1 == s2, (RuleElem::Term(t1), RuleElem::Term(t2)) => t1 == t2, + (RuleElem::Epsilon, RuleElem::Epsilon) => true, (RuleElem::EOF, RuleElem::EOF) => true, _ => false, } @@ -83,15 +155,23 @@ impl RuleElem { } #[derive(Debug, Clone)] -pub struct RuleSet { +pub struct RuleSet +where + T: TokenTag, + R: RuleTag, +{ pub top: String, - pub rules: Vec>, + pub rules: Vec>, } -impl FromIterator> for RuleSet { +impl FromIterator> for RuleSet +where + T: TokenTag, + R: RuleTag, +{ fn from_iter(rules: I) -> Self where - I: IntoIterator>, + I: IntoIterator>, { let rules = rules.into_iter().collect::>(); let top = match &rules[0].lhs { @@ -102,299 +182,30 @@ impl FromIterator> for RuleSet { } } -impl RuleSet { - pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { +impl RuleSet +where + T: TokenTag, + R: RuleTag, +{ + pub fn update_top(&mut self, rule: Rule) { + if let RuleElem::NonTerm(top) = &rule.lhs { + self.top = top.to_string(); + } + self.rules.push(rule); + } + + pub fn nonterms<'a>(&'a self) -> HashSet<&'a RuleElem> { self.rules.iter().flat_map(|rule| rule.nonterms()).collect() } - pub fn terms<'a>(&'a self) -> Vec<&'a RuleElem> { + pub fn terms<'a>(&'a self) -> HashSet<&'a RuleElem> { self.rules.iter().flat_map(|rule| rule.terms()).collect() } - pub fn find_rule<'a>(&'a self, target: &RuleElem) -> Vec<&'a Rule> { + pub fn find_rule<'a>(&'a self, target: &RuleElem) -> Vec<&'a Rule> { self.rules .iter() .filter(|rule| &rule.lhs == target) .collect() } - - pub fn first_set<'a>(&'a self) -> HashMap<&'a RuleElem, Vec<&'a RuleElem>> { - // 1. Calc a null set - let nulls_set = self.nulls_set(); - - // 2. Initialize a first set - let mut first_set: HashMap<&RuleElem, Vec<&RuleElem>> = HashMap::new(); - first_set.insert(&RuleElem::EOF, vec![&RuleElem::EOF]); - self.terms().into_iter().for_each(|relem| { - first_set.insert(relem, vec![relem]); - }); - self.nonterms().into_iter().for_each(|relem| { - first_set.insert(relem, vec![]); - }); - - // 3. List up candidates from a nonterm set - let mut candidates = vec![]; - for nonterm in self.nonterms() { - let rules = self.find_rule(nonterm); - for rule in rules { - for relem in &rule.rhs { - if &rule.lhs != relem { - candidates.push((nonterm, relem)) - } - if !nulls_set.contains(&relem) { - break; - } - } - } - } - - // 4. Find first set with recursive - let mut updated = true; - while updated { - updated = false; - for (nonterm, candidate) in &candidates { - let found_elems: Vec<&RuleElem> = first_set - .get(candidate) - .unwrap() - .iter() - .filter(|relem| !first_set.get(nonterm).unwrap().contains(relem)) - .copied() - .collect(); - updated = !found_elems.is_empty(); - first_set - .get_mut(nonterm) - .unwrap() - .extend(found_elems.into_iter()); - } - } - - first_set - } - - fn nulls_set<'a>(&'a self) -> Vec<&'a RuleElem> { - // 1. Find null rules - let mut nulls_set: Vec<&RuleElem> = self - .rules - .iter() - .filter(|rule| rule.rhs.is_empty()) - .map(|rule| &rule.lhs) - .collect(); - - // 2. Find null rules with recursive - let mut updated = true; - while updated { - updated = false; - for rule in &self.rules { - if nulls_set.contains(&&rule.lhs) { - continue; - } else if rule.rhs.iter().all(|relem| nulls_set.contains(&relem)) { - nulls_set.push(&rule.lhs); - updated = true; - } else { - continue; - } - } - } - - nulls_set - } } - -// #[cfg(test)] -// mod test { -// use std::collections::HashMap; - -// use crate::token::TokenTag; -// use crate::RuleKind; - -// use super::{Rule, RuleElem}; - -// #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] -// enum TestToken { -// Num, -// Plus, -// Minus, -// Mul, -// Div, -// BracketA, -// BracketB, -// } - -// impl TokenKind<'_> for TestToken { -// fn as_str(&self) -> &'static str { -// match self { -// TestToken::Num => r"^[1-9][0-9]*", -// TestToken::Plus => r"^\+", -// TestToken::Minus => r"^-", -// TestToken::Mul => r"^\*", -// TestToken::Div => r"^/", -// TestToken::BracketA => r"^\(", -// TestToken::BracketB => r"^\)", -// } -// } - -// fn ignore_str() -> &'static str { -// r"^[ \t\n]+" -// } - -// fn into_iter() -> impl Iterator { -// vec![ -// TestToken::Num, -// TestToken::Plus, -// TestToken::Minus, -// TestToken::Mul, -// TestToken::Div, -// TestToken::BracketA, -// TestToken::BracketB, -// ] -// .into_iter() -// } -// } - -// #[derive(Debug, Clone, Hash, PartialEq, Eq)] -// enum TestRule { -// ExprPlus, -// ExprMinus, -// Expr2Term, -// TermMul, -// TermDiv, -// Term2Fact, -// Fact2Expr, -// Fact2Num, -// } - -// impl<'a> RuleKind<'a> for TestRule { -// type TokenKind = TestToken; - -// fn into_iter() -> impl Iterator { -// Box::new( -// vec![ -// TestRule::ExprPlus, -// TestRule::ExprMinus, -// TestRule::Expr2Term, -// TestRule::TermMul, -// TestRule::TermDiv, -// TestRule::Term2Fact, -// TestRule::Fact2Expr, -// TestRule::Fact2Num, -// ] -// .into_iter(), -// ) -// } - -// fn into_rules(&self) -> Vec> { -// let expr_plus = Rule::from(( -// RuleElem::new_nonterm("expr"), -// vec![ -// RuleElem::new_nonterm("expr"), -// RuleElem::new_term(TestToken::Plus), -// RuleElem::new_nonterm("term"), -// ], -// )); - -// let expr_minus = Rule::from(( -// RuleElem::new_nonterm("expr"), -// vec![ -// RuleElem::new_nonterm("expr"), -// RuleElem::new_term(TestToken::Minus), -// RuleElem::new_nonterm("term"), -// ], -// )); - -// let expr_2_term = Rule::::from(( -// RuleElem::new_nonterm("expr"), -// vec![RuleElem::new_nonterm("term")], -// )); - -// let term_mul = Rule::from(( -// RuleElem::new_nonterm("term"), -// vec![ -// RuleElem::new_nonterm("term"), -// RuleElem::new_term(TestToken::Mul), -// RuleElem::new_nonterm("fact"), -// ], -// )); - -// let term_div = Rule::from(( -// RuleElem::new_nonterm("term"), -// vec![ -// RuleElem::new_nonterm("term"), -// RuleElem::new_term(TestToken::Div), -// RuleElem::new_nonterm("fact"), -// ], -// )); - -// let term_2_fact = Rule::::from(( -// RuleElem::new_nonterm("term"), -// vec![RuleElem::new_nonterm("fact")], -// )); - -// let fact_2_expr = Rule::from(( -// RuleElem::new_nonterm("fact"), -// vec![ -// RuleElem::new_term(TestToken::BracketA), -// RuleElem::new_nonterm("expr"), -// RuleElem::new_term(TestToken::BracketB), -// ], -// )); - -// let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![])); - -// match self { -// TestRule::ExprPlus => vec![expr_plus], -// TestRule::ExprMinus => vec![expr_minus], -// TestRule::Expr2Term => vec![expr_2_term], -// TestRule::TermMul => vec![term_mul], -// TestRule::TermDiv => vec![term_div], -// TestRule::Term2Fact => vec![term_2_fact], -// TestRule::Fact2Expr => vec![fact_2_expr], -// TestRule::Fact2Num => vec![fact_2_num], -// } -// } -// } - -// fn check>( -// first_set: &HashMap<&RuleElem, Vec<&RuleElem>>, -// nonterm: T, -// exp_terms: Vec, -// ) { -// let nonterms = RuleElem::::new_nonterm(nonterm); -// let exp_terms: Vec> = exp_terms -// .into_iter() -// .map(|term| RuleElem::new_term(term)) -// .collect(); -// assert!(first_set.get(&nonterms).unwrap().len() == exp_terms.len()); - -// let result = first_set -// .get(&nonterms) -// .unwrap() -// .into_iter() -// .zip(exp_terms.into_iter()) -// .any(|(a, b)| a == &&b); -// assert!(result); -// } - -// #[test] -// fn first_set() { -// let ruleset = ::into_ruleset(); -// let first_set = ruleset.first_set(); - -// check( -// &first_set, -// "expr", -// vec![ -// TestToken::Plus, -// TestToken::Minus, -// TestToken::Mul, -// TestToken::Div, -// TestToken::BracketA, -// ], -// ); -// check( -// &first_set, -// "term", -// vec![TestToken::Mul, TestToken::Div, TestToken::BracketA], -// ); -// check(&first_set, "fact", vec![TestToken::BracketA]); -// } -// } diff --git a/crates/cfg/src/token.rs b/crates/cfg/src/token.rs index 1469f80..8e47435 100644 --- a/crates/cfg/src/token.rs +++ b/crates/cfg/src/token.rs @@ -1,6 +1,8 @@ use std::fmt::Debug; use std::hash::Hash; +use serde::{Serialize, Deserialize}; + pub trait TokenTag where Self: Debug + Copy + Clone + Hash + Eq, @@ -8,7 +10,7 @@ where fn as_str<'a, 'b>(&'a self) -> &'b str; } -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, Serialize, Deserialize)] pub struct Token<'input, T: TokenTag> { pub kind: T, pub src: &'input str, diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 6d8bf34..e11227e 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_core" -version = "0.2.0" +version = "0.3.0" edition = "2021" [dependencies] @@ -19,5 +19,5 @@ copager_core = { path = "." } copager_lex = { path = "../lex", features = ["derive"]} copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } -copager_parse_lr1 = { path = "../parse_lr1" } +copager_parse_lr_lr1 = { path = "../parse_lr_lr1" } copager_ir_void = { path = "../ir_void" } diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index a7571f7..7577eb5 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -6,17 +6,17 @@ use serde::{Serialize, Deserialize}; use serde_cbor::ser::to_vec_packed; use serde_cbor::de::from_slice; -use copager_lex::{LexSource, LexDriver}; -use copager_parse::{ParseSource, ParseDriver, ParseEvent}; +use copager_lex::{LexSource, BaseLexer}; +use copager_parse::{ParseSource, BaseParser, ParseEvent}; use copager_ir::{IR, IRBuilder}; use copager_utils::cache::Cacheable; -pub trait GrammarDesign { +pub trait LanguageDesign { type Lex: LexSource; type Parse: ParseSource<::Tag>; } -pub struct Grammar +pub struct Language where Sl: LexSource, Sp: ParseSource, @@ -25,7 +25,7 @@ where _phantom_sp: PhantomData, } -impl GrammarDesign for Grammar +impl LanguageDesign for Language where Sl: LexSource, Sp: ParseSource, @@ -37,9 +37,9 @@ where #[derive(Debug, Serialize, Deserialize)] pub struct Processor where - G: GrammarDesign, - Dl: LexDriver, - Dp: ParseDriver, + G: LanguageDesign, + Dl: BaseLexer, + Dp: BaseParser, { // Cache cache_lex: Option>, @@ -62,9 +62,9 @@ where impl Processor where - G: GrammarDesign, - Dl: LexDriver, - Dp: ParseDriver, + G: LanguageDesign, + Dl: BaseLexer, + Dp: BaseParser, { pub fn new() -> Self { Processor { @@ -129,9 +129,9 @@ where impl Processor where - G: GrammarDesign, - Dl: LexDriver + Cacheable, - Dp: ParseDriver, + G: LanguageDesign, + Dl: BaseLexer + Cacheable, + Dp: BaseParser, { pub fn prebuild_lexer(self) -> anyhow::Result where @@ -159,9 +159,9 @@ where impl Processor where - G: GrammarDesign, - Dl: LexDriver, - Dp: ParseDriver + Cacheable<(G::Lex, G::Parse)>, + G: LanguageDesign, + Dl: BaseLexer, + Dp: BaseParser + Cacheable<(G::Lex, G::Parse)>, { pub fn prebuild_parser(self) -> anyhow::Result where diff --git a/crates/core/tests/prebuild.rs b/crates/core/tests/prebuild.rs index af7db01..89aed73 100644 --- a/crates/core/tests/prebuild.rs +++ b/crates/core/tests/prebuild.rs @@ -2,13 +2,13 @@ use serde::{Serialize, Deserialize}; use serde_cbor::ser::to_vec_packed; use serde_cbor::de::from_slice; -use copager_core::{Grammar, Processor}; +use copager_core::{Language, Processor}; use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; use copager_ir_void::Void; #[derive( @@ -54,10 +54,10 @@ enum ExprRule { Num, } -type MyGrammar = Grammar; +type MyLanguage = Language; type MyLexer = RegexLexer; type MyParser = LR1; -type MyProcessor = Processor; +type MyProcessor = Processor; #[test] fn prebuild() -> anyhow::Result<()> { diff --git a/crates/core/tests/simple.rs b/crates/core/tests/simple.rs index 4a62fd6..f9322b8 100644 --- a/crates/core/tests/simple.rs +++ b/crates/core/tests/simple.rs @@ -1,12 +1,12 @@ use serde::{Serialize, Deserialize}; -use copager_core::{Grammar, Processor}; +use copager_core::{Language, Processor}; use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; use copager_ir_void::Void; #[derive( @@ -52,10 +52,10 @@ enum ExprRule { Num, } -type MyGrammar = Grammar; +type MyLanguage = Language; type MyLexer = RegexLexer; type MyParser = LR1; -type MyProcessor = Processor; +type MyProcessor = Processor; #[test] fn simple_success() -> anyhow::Result<()> { diff --git a/crates/core/tests/simple_multiple.rs b/crates/core/tests/simple_multiple.rs index 5e8ebc0..69d6864 100644 --- a/crates/core/tests/simple_multiple.rs +++ b/crates/core/tests/simple_multiple.rs @@ -1,12 +1,12 @@ use serde::{Serialize, Deserialize}; -use copager_core::{Grammar, Processor}; +use copager_core::{Language, Processor}; use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; -use copager_parse_lr1::LR1; +use copager_parse_lr_lr1::LR1; use copager_ir_void::Void; #[derive( @@ -52,10 +52,10 @@ enum ExprRule { Num, } -type MyGrammar = Grammar; +type MyLanguage = Language; type MyLexer = RegexLexer; type MyParser = LR1; -type MyProcessor = Processor; +type MyProcessor = Processor; const OK_INPUTS: [&str; 7] = [ "1 + 2", diff --git a/crates/core_macros/Cargo.toml b/crates/core_macros/Cargo.toml index d33518c..256afa0 100644 --- a/crates/core_macros/Cargo.toml +++ b/crates/core_macros/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_core_macros" -version = "0.2.0" +version = "0.3.0" edition = "2021" [dependencies] diff --git a/crates/ir/Cargo.toml b/crates/ir/Cargo.toml index c05bab9..9851034 100644 --- a/crates/ir/Cargo.toml +++ b/crates/ir/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_ir" -version = "0.2.0" +version = "0.3.0" edition = "2021" [dependencies] diff --git a/crates/ir_sexp/Cargo.toml b/crates/ir_sexp/Cargo.toml index b3e15ca..983cc78 100644 --- a/crates/ir_sexp/Cargo.toml +++ b/crates/ir_sexp/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_ir_sexp" -version = "0.2.0" +version = "0.3.0" edition = "2021" [dependencies] @@ -12,8 +12,9 @@ copager_parse = { path = "../parse" } copager_ir = { path = "../ir" } [dev-dependencies] +copager_core = { path = "../core" } copager_lex = { path = "../lex", features = ["derive"] } copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } -copager_parse_lr1 = { path = "../parse_lr1" } +copager_parse_lr_lr1 = { path = "../parse_lr_lr1" } copager_ir_sexp = { path = "." } diff --git a/crates/ir_sexp/tests/simple.rs b/crates/ir_sexp/tests/simple.rs index 0f42f78..c42729c 100644 --- a/crates/ir_sexp/tests/simple.rs +++ b/crates/ir_sexp/tests/simple.rs @@ -1,10 +1,10 @@ +use copager_core::{Language, Processor}; use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleTag, Rule, RuleElem}; -use copager_lex::{LexSource, LexDriver}; +use copager_lex::LexSource; use copager_lex_regex::RegexLexer; -use copager_parse::{ParseSource, ParseDriver, ParseEvent}; -use copager_parse_lr1::LR1; -use copager_ir::{IR, IRBuilder}; +use copager_parse::ParseSource; +use copager_parse_lr_lr1::LR1; use copager_ir_sexp::SExp; #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] @@ -44,10 +44,6 @@ enum ExprRule { Num, } -type MyLexer = RegexLexer; -type MyParser = LR1; -type MyIR = SExp<'static, ExprToken, ExprRule>; - #[test] fn simple_display() { let ir = parse("1"); @@ -68,28 +64,15 @@ fn simple_eval() { } fn parse<'input>(input: &'input str) -> anyhow::Result> { - let source = ExprToken::default(); - let lexer = >::try_from(source).unwrap(); - - let source = (ExprToken::default(), ExprRule::default()); - let parser = >::try_from(source).unwrap(); - - let mut ir_builder = >::Builder::new(); - for event in parser.run(lexer.run(input)) { - match event { - ParseEvent::Read(token) => { - ir_builder.on_read(token).unwrap(); - } - ParseEvent::Parse { rule, len } => { - ir_builder.on_parse(rule, len).unwrap(); - } - ParseEvent::Err(err) => { - return Err(anyhow::anyhow!("{:?}", err)); - } - } - } + type TestLang = Language; + type TestLexer = RegexLexer; + type TestParser = LR1; + type TestProcessor = Processor; - ir_builder.build() + TestProcessor::new() + .build_lexer()? + .build_parser()? + .process::>(input) } fn eval(ir: &SExp<'static, ExprToken, ExprRule>) -> i32 { diff --git a/crates/ir_void/Cargo.toml b/crates/ir_void/Cargo.toml index fcc2038..9889ab1 100644 --- a/crates/ir_void/Cargo.toml +++ b/crates/ir_void/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_ir_void" -version = "0.2.0" +version = "0.3.0" edition = "2021" [dependencies] diff --git a/crates/lex/Cargo.toml b/crates/lex/Cargo.toml index df17694..d5bc620 100644 --- a/crates/lex/Cargo.toml +++ b/crates/lex/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_lex" -version = "0.2.0" +version = "0.3.0" edition = "2021" [dependencies] diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs index 76bd97f..e7bf1fd 100644 --- a/crates/lex/src/lib.rs +++ b/crates/lex/src/lib.rs @@ -9,7 +9,7 @@ pub trait LexSource { fn iter(&self) -> impl Iterator; } -pub trait LexDriver +pub trait BaseLexer where Self: Sized, S: LexSource, diff --git a/crates/lex_derive/Cargo.toml b/crates/lex_derive/Cargo.toml index 6c9cabd..1d2f9ba 100644 --- a/crates/lex_derive/Cargo.toml +++ b/crates/lex_derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_lex_derive" -version = "0.2.0" +version = "0.3.0" edition = "2021" [dependencies] diff --git a/crates/lex_regex/Cargo.toml b/crates/lex_regex/Cargo.toml index 5e07b94..58e0284 100644 --- a/crates/lex_regex/Cargo.toml +++ b/crates/lex_regex/Cargo.toml @@ -2,7 +2,7 @@ cargo-features = ["edition2024"] [package] name = "copager_lex_regex" -version = "0.2.0" +version = "0.3.0" edition = "2024" [dependencies] diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index 2e22254..18982e9 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -1,20 +1,18 @@ #![feature(gen_blocks)] -use std::rc::Rc; - use regex::{Regex, RegexSet}; use copager_cfg::token::{TokenTag, Token}; -use copager_lex::{LexSource, LexDriver}; +use copager_lex::{LexSource, BaseLexer}; #[derive(Debug)] pub struct RegexLexer { - regex_istr: Rc, - regex_set: Rc, - regex_map: Rc>, + regex_istr: Regex, + regex_set: RegexSet, + regex_map: Vec<(Regex, S::Tag)>, } -impl LexDriver for RegexLexer { +impl BaseLexer for RegexLexer { fn try_from(source: S) -> anyhow::Result { let regex_istr = Regex::new(source.ignore_token())?; let regex_set = source.iter() @@ -26,9 +24,9 @@ impl LexDriver for RegexLexer { .collect::>>()?; Ok(RegexLexer { - regex_istr: Rc::new(regex_istr), - regex_set: Rc::new(regex_set), - regex_map: Rc::new(regex_map), + regex_istr, + regex_set, + regex_map, }) } @@ -45,24 +43,23 @@ impl LexDriver for RegexLexer { }; // Find the token - let mut matches = self + let matched = self .regex_set .matches(remain) .into_iter() .map(|idx| &self.regex_map[idx]) .map(|(regex, token)| (*token, regex.find(remain).unwrap().as_str())) - .collect::>(); - matches.sort_by(|(_, a), (_, b)| a.len().cmp(&b.len())); + .next(); // Update pos - let (token, acc_s) = match matches.first() { + let (token, acc_s) = match matched { Some(a) => a, None => return, }; let range = (pos, pos + acc_s.len()); pos += acc_s.len(); - yield Token::new(*token, &input, range); + yield Token::new(token, &input, range); } } } diff --git a/crates/lex_regex/tests/simple.rs b/crates/lex_regex/tests/simple.rs index 6a15694..86ee5d3 100644 --- a/crates/lex_regex/tests/simple.rs +++ b/crates/lex_regex/tests/simple.rs @@ -1,5 +1,5 @@ use copager_cfg::token::{TokenTag, Token}; -use copager_lex::{LexSource, LexDriver}; +use copager_lex::{LexSource, BaseLexer}; use copager_lex_regex::RegexLexer; #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] @@ -28,7 +28,7 @@ type MyLexer = RegexLexer; #[test] fn simple_success() { let source = ExprToken::default(); - let lexer = >::try_from(source).unwrap(); + let lexer = >::try_from(source).unwrap(); let mut lexer = lexer.run("1 + 2 * 3"); assert_eq_token(lexer.next(), "1"); assert_eq_token(lexer.next(), "+"); @@ -42,7 +42,7 @@ fn simple_success() { #[should_panic] fn simple_failed() { let source = ExprToken::default(); - let lexer = >::try_from(source).unwrap(); + let lexer = >::try_from(source).unwrap(); let mut lexer = lexer.run("1 + 2 * stop 3"); assert_eq_token(lexer.next(), "1"); assert_eq_token(lexer.next(), "+"); diff --git a/crates/parse/Cargo.toml b/crates/parse/Cargo.toml index 7863532..f4c33f0 100644 --- a/crates/parse/Cargo.toml +++ b/crates/parse/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_parse" -version = "0.2.0" +version = "0.3.0" edition = "2021" [dependencies] diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 760f467..83a0cd9 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -9,7 +9,7 @@ pub trait ParseSource { fn iter(&self) -> impl Iterator; - fn into_ruleset(&self) -> RuleSet { + fn into_ruleset(&self) -> RuleSet { let set_id_for_all = |(id, tag): (usize, Self::Tag)| { tag.as_rules() .into_iter() @@ -22,11 +22,11 @@ pub trait ParseSource { self.iter() .enumerate() .flat_map(set_id_for_all) - .collect::>() + .collect::>() } } -pub trait ParseDriver +pub trait BaseParser where Self: Sized, Sl: LexSource, diff --git a/crates/parse_common/Cargo.toml b/crates/parse_common/Cargo.toml new file mode 100644 index 0000000..7f261cf --- /dev/null +++ b/crates/parse_common/Cargo.toml @@ -0,0 +1,15 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_common" +version = "0.3.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } + +[dev-dependencies] +copager_lex = { path = "../lex", features = ["derive"] } +copager_parse = { path = "../parse", features = ["derive"] } diff --git a/crates/parse_common/src/lib.rs b/crates/parse_common/src/lib.rs new file mode 100644 index 0000000..90d8760 --- /dev/null +++ b/crates/parse_common/src/lib.rs @@ -0,0 +1 @@ +pub mod rule; diff --git a/crates/parse_common/src/rule.rs b/crates/parse_common/src/rule.rs new file mode 100644 index 0000000..6eb0269 --- /dev/null +++ b/crates/parse_common/src/rule.rs @@ -0,0 +1,7 @@ +mod first; +mod follow; +mod director; + +pub use first::FirstSet; +pub use follow::FollowSet; +pub use director::DirectorSet; diff --git a/crates/parse_common/src/rule/director.rs b/crates/parse_common/src/rule/director.rs new file mode 100644 index 0000000..a80f7d0 --- /dev/null +++ b/crates/parse_common/src/rule/director.rs @@ -0,0 +1,178 @@ +use std::collections::{HashMap, HashSet}; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; + +use crate::rule::{FirstSet, FollowSet}; + +pub struct DirectorSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap<&'a Rule, Vec<&'a RuleElem>>, +} + +impl<'a, T, R> From<&'a RuleSet> for DirectorSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let build = DirectorSetBuilder::from(ruleset).calc(); + let map = build.map + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect())) + .collect(); + + DirectorSet { map } + } +} + +impl <'a, T, R> DirectorSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn get(&self, rule: &Rule) -> Option<&[&'a RuleElem]> { + self.map.get(rule).map(|elems| elems.as_slice()) + } +} + +struct DirectorSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap<&'a Rule, HashSet<&'a RuleElem>>, + ruleset: &'a RuleSet, + first_set: FirstSet<'a, T, R>, + follow_set: FollowSet<'a, T, R>, +} + +impl<'a, T, R> From<&'a RuleSet> for DirectorSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let first_set = FirstSet::from(ruleset); + let follow_set = FollowSet::from(ruleset); + + DirectorSetBuilder { + map: HashMap::new(), + ruleset, + first_set, + follow_set, + } + } +} + +impl<'a, T, R> DirectorSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn calc(mut self) -> Self { + for rule in &self.ruleset.rules { + self.calc_once(rule); + } + self + } + + fn calc_once(&mut self, rule: &'a Rule) { + let lhs = match &rule.lhs { + RuleElem::NonTerm(s) => s.as_str(), + _ => unreachable!(), + }; + + let rhs_firsts = self.first_set.get_by(&rule.rhs).to_vec(); + let cand_elems = if !rhs_firsts.contains(&&RuleElem::Epsilon) { + rhs_firsts + } else { + let mut cand_elems = rhs_firsts.to_vec(); + cand_elems.extend_from_slice(self.follow_set.get(&lhs).unwrap()); + cand_elems + }; + + let director_elems = cand_elems + .into_iter() + .filter(|&e| *e != RuleElem::Epsilon) + .collect(); + self.map.insert(rule, director_elems); + } +} + +#[cfg(test)] +mod test { + use copager_cfg::token::TokenTag; + use copager_cfg::rule::{Rule, RuleTag, RuleElem}; + use copager_lex::LexSource; + use copager_parse::ParseSource; + + use super::DirectorSet; + + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] + enum TestToken { + #[token(r"a")] + A, + #[token(r"b")] + B, + } + + #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] + enum TestRule { + #[default] + #[rule(" ::= ")] + S, + #[rule(" ::= A")] + A, + #[rule(" ::= B")] + B, + #[rule(" ::= ")] + C, + } + + fn eq_symbols(lhs: &[&RuleElem], rhs: &[RuleElem]) -> bool + where + T: TokenTag, + { + if lhs.len() != rhs.len() { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + for lelem in lhs { + if !rhs.contains(lelem) { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + } + return true; + } + + #[test] + fn follow_set() { + macro_rules! term { + ($expr:ident) => { RuleElem::new_term(TestToken::$expr) }; + } + + let ruleset = TestRule::default().into_ruleset(); + let director_set = DirectorSet::from(&ruleset); + + let rule = &TestRule::S.as_rules()[0]; + let expected = vec![term!(A)]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + + let rule = &TestRule::A.as_rules()[0]; + let expected = vec![term!(A)]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + + let rule = &TestRule::B.as_rules()[0]; + let expected = vec![term!(A)]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + + let rule = &TestRule::C.as_rules()[0]; + let expected = vec![]; + assert!(eq_symbols(director_set.get(rule).unwrap(), expected.as_slice())); + } +} diff --git a/crates/parse_common/src/rule/first.rs b/crates/parse_common/src/rule/first.rs new file mode 100644 index 0000000..600daf4 --- /dev/null +++ b/crates/parse_common/src/rule/first.rs @@ -0,0 +1,211 @@ +use std::collections::{HashMap, HashSet}; +use std::marker::PhantomData; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleElem, RuleSet, RuleTag}; + +pub struct FirstSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap<&'a RuleElem, Vec<&'a RuleElem>>, + _phantom: PhantomData, +} + +impl<'a, T, R> From<&'a RuleSet> for FirstSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let build = FirstSetBuilder::from(ruleset).expand(); + let map = build.map + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect())) + .collect(); + + FirstSet { + map, + _phantom: PhantomData, + } + } +} + +impl<'a, T, R> FirstSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn get(&self, relem: &RuleElem) -> Option<&[&'a RuleElem]> { + self.map.get(relem).map(|terms| terms.as_slice()) + } + + pub fn get_by(&self, relems: &[RuleElem]) -> Vec<&'a RuleElem> { + if relems.is_empty() { + vec![&RuleElem::EOF] + } else { + let mut firsts: HashSet<&'a RuleElem> = HashSet::new(); + for relem in relems { + let first_candidates = self.map.get(relem).unwrap(); + firsts.extend(first_candidates); + if firsts.contains(&RuleElem::Epsilon) { + firsts.remove(&RuleElem::Epsilon); + continue + } + return firsts.into_iter().collect(); + } + firsts.insert(&RuleElem::EOF); + firsts.into_iter().collect() + } + } +} + +struct FirstSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap<&'a RuleElem, HashSet<&'a RuleElem>>, + ruleset: &'a RuleSet, + nonterms: Vec<&'a RuleElem>, +} + +impl<'a, T, R> From<&'a RuleSet> for FirstSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let mut map = HashMap::new(); + ruleset.nonterms().iter().for_each(|&nonterm| { + map.insert(nonterm, HashSet::new()); + }); + ruleset.terms().iter().for_each(|&term| { + map.insert(term, HashSet::new()); + map.get_mut(term).unwrap().insert(term); + }); + map.insert(&RuleElem::EOF, HashSet::new()); + map.get_mut(&RuleElem::EOF).unwrap().insert(&RuleElem::EOF); + + let nonterms = ruleset.nonterms().into_iter().collect(); + + FirstSetBuilder { + map, + ruleset, + nonterms, + } + } +} + +impl<'a, T, R> FirstSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn expand(mut self) -> Self { + while self.expand_child() {} + self + } + + fn expand_child(&mut self) -> bool { + let mut modified = false; + for &nonterm in &self.nonterms { + let old_len = self.map.get(nonterm).unwrap().len(); + for first_symbol in rhs_first_symbol(self.ruleset, nonterm) { + if matches!(first_symbol, RuleElem::NonTerm(_)) { + let cand_terms = self.map.get(first_symbol).unwrap().clone(); + self.map.get_mut(nonterm).unwrap().extend(cand_terms); + } else { + self.map.get_mut(nonterm).unwrap().insert(first_symbol); + } + } + modified |= old_len != self.map.get(nonterm).unwrap().len(); + } + modified + } +} + +fn rhs_first_symbol<'a, T, R>(ruleset: &'a RuleSet, nonterm: &RuleElem) -> impl Iterator> +where + T: TokenTag, + R: RuleTag, +{ + ruleset.rules + .iter() + .filter(move |&rule| &rule.lhs == nonterm) + .flat_map(|rule| rule.rhs.first()) +} + +#[cfg(test)] +mod test { + use copager_cfg::token::TokenTag; + use copager_cfg::rule::{Rule, RuleTag, RuleElem}; + use copager_lex::LexSource; + use copager_parse::ParseSource; + + use super::FirstSet; + + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] + enum TestToken { + #[token(r"a")] + A, + #[token(r"b")] + B, + } + + #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] + enum TestRule { + #[default] + #[rule(" ::= ")] + S, + #[rule(" ::= A")] + A, + #[rule(" ::= B")] + B, + #[rule(" ::= ")] + C, + } + + fn eq_symbols(lhs: &[&RuleElem], rhs: &[RuleElem]) -> bool + where + T: TokenTag, + { + if lhs.len() != rhs.len() { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + for lelem in lhs { + if !rhs.contains(lelem) { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + } + return true; + } + + #[test] + fn first_set() { + macro_rules! term { + ($ident:ident) => { RuleElem::new_term(TestToken::$ident) }; + } + macro_rules! nonterm { + ($expr:expr) => { RuleElem::new_nonterm($expr) }; + } + + let ruleset = TestRule::default().into_ruleset(); + let first_set = FirstSet::from(&ruleset); + + let expected = vec![term!(A)]; + assert!(eq_symbols(first_set.get(&nonterm!("S")).unwrap(), expected.as_slice())); + + let expected = vec![term!(A)]; + assert!(eq_symbols(first_set.get(&nonterm!("A")).unwrap(), expected.as_slice())); + + let expected = vec![term!(A)]; + assert!(eq_symbols(first_set.get(&nonterm!("B")).unwrap(), expected.as_slice())); + + let expected = vec![RuleElem::Epsilon]; + assert!(eq_symbols(first_set.get(&nonterm!("C")).unwrap(), expected.as_slice())); + } +} diff --git a/crates/parse_common/src/rule/follow.rs b/crates/parse_common/src/rule/follow.rs new file mode 100644 index 0000000..f637c31 --- /dev/null +++ b/crates/parse_common/src/rule/follow.rs @@ -0,0 +1,199 @@ +use std::collections::{HashMap, HashSet}; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleElem, RuleSet, RuleTag}; + +use crate::rule::FirstSet; + +pub struct FollowSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap>>, + _ruleset: &'a RuleSet, +} + +impl<'a, T, R> From<&'a RuleSet> for FollowSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let build = FollowSetBuilder::from(ruleset).expand(); + let map = build.map + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect())) + .collect(); + + FollowSet { + map, + _ruleset: ruleset, + } + } +} + +impl<'a, T, R> FollowSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn get(&self, nonterm: &str) -> Option<&[&'a RuleElem]> { + self.map.get(nonterm).map(|terms| terms.as_slice()) + } +} + +pub struct FollowSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + map: HashMap>>, + ruleset: &'a RuleSet, +} + +impl<'a, T, R> From<&'a RuleSet> for FollowSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let mut map = HashMap::new(); + for nonterm in ruleset.nonterms() { + if let RuleElem::NonTerm(nonterm) = nonterm { + map.insert(nonterm.clone(), HashSet::new()); + } + } + map.get_mut(&ruleset.top).unwrap().insert(&RuleElem::EOF); + + FollowSetBuilder { + map, + ruleset, + } + } +} + +impl<'a, T, R> FollowSetBuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn expand(mut self) -> Self { + while self.expand_child() {} + self + } + + fn expand_child(&mut self) -> bool { + let first_set = FirstSet::from(self.ruleset); + + let mut modified = false; + for rule in &self.ruleset.rules { + let lhs = match &rule.lhs { + RuleElem::NonTerm(s) => s.as_str(), + _ => unreachable!(), + }; + for rhs_idx in 0..rule.rhs.len() { + let target = &rule.rhs[rhs_idx]; + let follow_symbols = &rule.rhs[rhs_idx+1..]; + let prob_first_symbols = first_set.get_by(follow_symbols); + modified |= self.append_by_first(target, &prob_first_symbols); + if prob_first_symbols.contains(&&RuleElem::Epsilon) { + modified |= self.append_when_nullable(target, lhs); + } + } + } + modified + } + + fn append_by_first(&mut self, target: &RuleElem, first_symbol: &[&'a RuleElem]) -> bool { + if let RuleElem::NonTerm(nonterm) = target { + let old_idx = self.map.get(nonterm).unwrap().len(); + let first_symbol = first_symbol.iter().filter(|relem| **relem != &RuleElem::Epsilon); + self.map.get_mut(nonterm).unwrap().extend(first_symbol); + old_idx != self.map.get(nonterm).unwrap().len() + } else { + false + } + } + + fn append_when_nullable(&mut self, target: &RuleElem, lhs: &str) -> bool { + if let RuleElem::NonTerm(nonterm) = target { + let lhs_follow = self.map.get(lhs).unwrap().clone(); + let old_idx = self.map.get(nonterm).unwrap().len(); + self.map.get_mut(nonterm).unwrap().extend(lhs_follow); + old_idx != self.map.get(nonterm).unwrap().len() + } else { + false + } + } +} + +#[cfg(test)] +mod test { + use copager_cfg::token::TokenTag; + use copager_cfg::rule::{Rule, RuleTag, RuleElem}; + use copager_lex::LexSource; + use copager_parse::ParseSource; + + use super::FollowSet; + + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] + enum TestToken { + #[token(r"a")] + A, + #[token(r"b")] + B, + } + + #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] + enum TestRule { + #[default] + #[rule(" ::= ")] + S, + #[rule(" ::= A")] + A, + #[rule(" ::= B")] + B, + #[rule(" ::= ")] + C, + } + + fn eq_symbols(lhs: &[&RuleElem], rhs: &[RuleElem]) -> bool + where + T: TokenTag, + { + if lhs.len() != rhs.len() { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + for lelem in lhs { + if !rhs.contains(lelem) { + println!("lhs: {:?}, rhs: {:?}", lhs, rhs); + return false; + } + } + return true; + } + + #[test] + fn follow_set() { + macro_rules! term { + ($expr:ident) => { RuleElem::new_term(TestToken::$expr) }; + } + + let ruleset = TestRule::default().into_ruleset(); + let follow_set = FollowSet::from(&ruleset); + + let expected = vec![term!(B), RuleElem::EOF]; + assert!(eq_symbols(follow_set.get("S").unwrap(), expected.as_slice())); + + let expected = vec![term!(A)]; + assert!(eq_symbols(follow_set.get("A").unwrap(), expected.as_slice())); + + let expected = vec![term!(B), RuleElem::EOF]; + assert!(eq_symbols(follow_set.get("B").unwrap(), expected.as_slice())); + + let expected = vec![]; + assert!(eq_symbols(follow_set.get("C").unwrap(), expected.as_slice())); + } +} diff --git a/crates/parse_derive/Cargo.toml b/crates/parse_derive/Cargo.toml index dc2fd69..1046351 100644 --- a/crates/parse_derive/Cargo.toml +++ b/crates/parse_derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_parse_derive" -version = "0.2.0" +version = "0.3.0" edition = "2021" [dependencies] diff --git a/crates/parse_derive/src/impl/rule.rs b/crates/parse_derive/src/impl/rule.rs index 5031024..6855b79 100644 --- a/crates/parse_derive/src/impl/rule.rs +++ b/crates/parse_derive/src/impl/rule.rs @@ -29,7 +29,7 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { quote! { impl RuleTag<#enum_assoc_type> for #enum_name { - fn as_rules(&self) -> Vec> { + fn as_rules(&self) -> Vec> { match self { #( #enum_matcher_table_i2r, )* } @@ -49,7 +49,7 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { struct VariantInfo<'a> { parent_ident: &'a Ident, self_ident: &'a Ident, - rules: Vec, + rule_lhs_rhs_tuples: Vec, } impl<'a> VariantInfo<'a> { @@ -60,18 +60,18 @@ impl<'a> VariantInfo<'a> { .parse::() .unwrap(); - let mut rules = vec![]; + let mut rule_lhs_rhs_tuples = vec![]; for attr in &variant.attrs { if attr.path().is_ident("rule") { let attr = attr.parse_args::().unwrap().value(); - rules.push(parse_rule(&token_ident, &attr)); + rule_lhs_rhs_tuples.push(parse_rule(&token_ident, &attr)); } } VariantInfo { parent_ident, self_ident, - rules, + rule_lhs_rhs_tuples, } } @@ -84,11 +84,11 @@ impl<'a> VariantInfo<'a> { fn gen_matcher_ident_to_rule(&self) -> TokenStream { let ident = self.gen_ident(); - if self.rules.is_empty() { + if self.rule_lhs_rhs_tuples.is_empty() { quote! { #ident => unimplemented!() } } else { - let rules = &self.rules; - quote! { #ident => vec![#(#rules),*] } + let lhs_rhs_tuple = &self.rule_lhs_rhs_tuples; + quote! { #ident => vec![#(Rule::new(Some(#ident), #lhs_rhs_tuple)),*] } } } } @@ -112,6 +112,11 @@ fn parse_rule(token: &TokenStream, input: &str) -> TokenStream { } }) .collect::>(); + let rhs = if rhs.len() == 0 { + vec![quote! { RuleElem::Epsilon }] + } else { + rhs + }; - quote! { Rule::from((#lhs, vec![ #( #rhs, )* ])) } + quote! { #lhs, vec![ #( #rhs, )* ], } } diff --git a/crates/parse_lr1/src/builder.rs b/crates/parse_lr1/src/builder.rs deleted file mode 100644 index 5978ed8..0000000 --- a/crates/parse_lr1/src/builder.rs +++ /dev/null @@ -1,394 +0,0 @@ -use std::collections::{HashMap, HashSet}; -use std::hash::Hash; - -use itertools::Itertools; -use serde::{Serialize, Deserialize}; - -use copager_cfg::token::TokenTag; -use copager_cfg::rule::{Rule, RuleElem, RuleSet}; -use copager_lex::LexSource; -use copager_parse::ParseSource; - -#[derive(Debug, Serialize, Deserialize)] -pub enum LRAction { - Shift(usize), - Reduce(R, usize, usize), // tag, goto_id, elems_cnt - Accept, - None, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct LR1Configure -where - Sl: LexSource, - Sp: ParseSource, -{ - #[serde(bound( - serialize = "Sl::Tag: Serialize, Sp::Tag: Serialize", - deserialize = "Sl::Tag: Deserialize<'de>, Sp::Tag: Deserialize<'de>", - ))] - pub action_table: Vec>>, - pub eof_action_table: Vec>, - pub goto_table: Vec>, -} - -impl LR1Configure -where - Sl: LexSource, - Sp: ParseSource, -{ - pub fn new(source_l: &Sl, source_p: &Sp) -> anyhow::Result { - // 1. Pre-process - let ruleset = source_p.into_ruleset(); - let first_set = ruleset.first_set(); - - // 2. Generate dummy nonterm - let top_dummy: Rule = Rule::from(( - RuleElem::new_nonterm("__top_dummy"), - vec![RuleElem::new_nonterm(&ruleset.top)], - )); - let top_dummy = vec![LRItem::new( - &top_dummy, - HashSet::from_iter(vec![&RuleElem::EOF]), - )]; - let lr_items = LRItemSet::new(0, HashSet::from_iter(top_dummy)); - let lr_items = lr_items.expand_closure(&ruleset, &first_set); - - // 3. Generate a DFA - let dfa = LRItemDFA::r#gen(lr_items, &ruleset, &first_set); - - // 4. Initialize tables - let mut idx = 0; - let mut nonterm_table = HashMap::new(); - for relem in ruleset.nonterms() { - if let RuleElem::NonTerm(s) = &relem { - if !nonterm_table.contains_key(s) { - nonterm_table.insert(s.to_string(), idx); - idx += 1; - } - } - } - - let mut action_table: Vec>> = Vec::with_capacity(dfa.0.len()); - let mut eof_action_table: Vec> = Vec::with_capacity(dfa.0.len()); - let mut goto_table: Vec> = Vec::with_capacity(dfa.0.len()); - for _ in 0..dfa.0.len() { - action_table.push(HashMap::from_iter( - source_l.iter() - .map(|token| (token, LRAction::None)) - .collect::)>>(), - )); - eof_action_table.push(LRAction::None); - goto_table.push(vec![0; nonterm_table.keys().len()]); - } - - // 5. Setup tables - let rule_tags = source_p.iter().collect::>(); - for lritem_set in &dfa.0 { - for (token, next) in &lritem_set.next { - match &token { - RuleElem::NonTerm(s) => { - let id = lritem_set.id as usize; - let label = *nonterm_table.get(s).unwrap(); - goto_table[id][label] = *next as usize; - } - RuleElem::Term(t) => { - let id = lritem_set.id as usize; - let label = action_table[id].get_mut(t).unwrap(); - *label = LRAction::Shift(*next as usize); - } - _ => {} - } - } - - for item in &lritem_set.lr_items { - if item.dot_pos != item.rule.rhs.len() { - continue; - } - if let RuleElem::NonTerm(lhs) = &item.rule.lhs { - for la_token in &item.la_tokens { - if let RuleElem::Term(t) = la_token { - let id = lritem_set.id as usize; - let label = action_table[id].get_mut(t).unwrap(); - *label = LRAction::Reduce( - rule_tags[item.rule.id as usize], - *nonterm_table.get(lhs).unwrap(), - item.rule.rhs.len(), - ); - } - if let RuleElem::EOF = la_token { - let id = lritem_set.id as usize; - eof_action_table[id] = if lhs == "__top_dummy" { - LRAction::Accept - } else { - LRAction::Reduce( - rule_tags[item.rule.id as usize], - *nonterm_table.get(lhs).unwrap(), - item.rule.rhs.len(), - ) - }; - } - } - } - } - } - - Ok(LR1Configure { - action_table, - eof_action_table, - goto_table, - }) - } -} - -#[derive(Debug)] -struct LRItemDFA<'a, T: TokenTag> ( - Vec> -); - -impl<'a, T: TokenTag> LRItemDFA<'a, T> { - fn r#gen( - init_set: LRItemSet<'a, T>, - ruleset: &'a RuleSet, - first_set: &HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> LRItemDFA<'a, T> { - let issue_id = |old_sets: &Vec>, set: &LRItemSet<'a, T>| { - if let Some(ex_set) = old_sets.iter().find(|&set0| set0.strict_eq(set)) { - Err(ex_set.id) - } else { - Ok(old_sets.len() as i32) - } - }; - - // "Expand a closure" <--> "Generate next nodes" loop - let mut loop_idx = (0, 1); - let mut lritem_sets = vec![init_set]; - while loop_idx.0 != loop_idx.1 { - let mut new_found_cnt = 0; - for idx in loop_idx.0..loop_idx.1 { - let next_sets = lritem_sets[idx].gen_next_sets(ruleset, first_set); - for (bef_token, mut next_set) in next_sets { - match issue_id(&lritem_sets, &next_set) { - Ok(id) => { - next_set.id = id; - lritem_sets[idx].next.insert(bef_token, id); - lritem_sets.push(next_set); - new_found_cnt += 1; - } - Err(id) => { - lritem_sets[idx].next.insert(bef_token, id); - } - } - } - } - loop_idx = (loop_idx.1, loop_idx.1 + new_found_cnt); - } - - LRItemDFA(lritem_sets) - } -} - -#[derive(Clone, Debug, Eq)] -struct LRItemSet<'a, T: TokenTag> { - id: i32, - next: HashMap<&'a RuleElem, i32>, - lr_items: HashSet>, -} - -impl<'a, T: TokenTag> PartialEq for LRItemSet<'a, T> { - fn eq(&self, other: &LRItemSet<'a, T>) -> bool { - self.lr_items == other.lr_items - } -} - -impl<'a, T: TokenTag> PartialEq>> for LRItemSet<'a, T> { - fn eq(&self, other: &HashSet>) -> bool { - &self.lr_items == other - } -} - -impl<'a, T: TokenTag> LRItemSet<'a, T> { - fn new(id: i32, lr_items: HashSet>) -> Self { - LRItemSet { - id, - next: HashMap::new(), - lr_items, - } - } - - fn strict_eq(&self, other: &Self) -> bool { - if self.lr_items.len() != other.lr_items.len() { - return false; - } - self.lr_items - .iter() - .all(|item| other.lr_items.iter().any(|item_b| item_b.strict_eq(item))) - } - - fn expand_closure<'b>( - mut self, - ruleset: &'a RuleSet, - first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> LRItemSet<'a, T> { - let mut lr_items = self.lr_items.clone(); - let mut lr_items_fetched = self.lr_items; - loop { - let new_items: Vec> = lr_items_fetched - .iter() - .flat_map(|item| item.expand_closure(ruleset, first_set)) - .collect(); - let new_items = LRItem::<'_, _>::unify_all(new_items); - let new_items = HashSet::from_iter(new_items); - - let bef_len = lr_items.len(); - lr_items = LRItem::<'_, _>::unity_set(lr_items, new_items.clone()); - let af_len = lr_items.len(); - if bef_len == af_len { - break; - } - lr_items_fetched = new_items; - } - self.lr_items = lr_items; - - self - } - - fn gen_next_sets<'b>( - &self, - ruleset: &'a RuleSet, - first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> HashMap<&'a RuleElem, LRItemSet<'a, T>> { - let new_items: Vec<(&'a RuleElem, LRItem<'a, T>)> = self - .lr_items - .iter() - .filter_map(|lr_item| lr_item.next_dot()) - .collect(); - - let mut new_sets: HashMap<&RuleElem, HashSet>> = HashMap::new(); - for (bef_token, lr_item) in new_items { - if new_sets.get(&bef_token).is_none() { - new_sets.insert(bef_token, HashSet::new()); - } - new_sets.get_mut(&bef_token).unwrap().insert(lr_item); - } - - let mut new_sets_expanded: HashMap<&'a RuleElem, LRItemSet<'_, _>> = HashMap::new(); - for (ktoken, new_set) in new_sets { - let new_set = LRItemSet::new(0, new_set); - let new_set = new_set.expand_closure(ruleset, first_set); - new_sets_expanded.insert(ktoken, new_set); - } - - new_sets_expanded - } -} - -#[derive(Clone, Debug, Eq)] -struct LRItem<'a, T: TokenTag> { - rule: &'a Rule, - dot_pos: usize, - la_tokens: HashSet<&'a RuleElem>, -} - -impl<'a, T: TokenTag> Hash for LRItem<'a, T> { - fn hash(&self, state: &mut H) { - self.rule.hash(state); - self.dot_pos.hash(state); - } -} - -impl<'a, T: TokenTag> PartialEq for LRItem<'a, T> { - fn eq(&self, other: &Self) -> bool { - self.rule == other.rule && self.dot_pos == other.dot_pos - } -} - -impl<'a, T: TokenTag> LRItem<'a, T> { - fn new(rule: &'a Rule, la_tokens: HashSet<&'a RuleElem>) -> LRItem<'a, T> { - LRItem { - rule, - dot_pos: 0, - la_tokens, - } - } - - fn strict_eq(&self, other: &Self) -> bool { - self.rule == other.rule - && self.dot_pos == other.dot_pos - && self.la_tokens == other.la_tokens - } - - fn expand_closure<'b>( - &self, - ruleset: &'a RuleSet, - first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, - ) -> HashSet> { - let af_la_tokens = if self.dot_pos + 1 < self.rule.rhs.len() { - HashSet::from_iter( - first_set - .get(&self.rule.rhs[self.dot_pos + 1]) - .unwrap() - .clone(), - ) - } else { - self.la_tokens.clone() - }; - - if self.dot_pos < self.rule.rhs.len() - && matches!(self.rule.rhs[self.dot_pos], RuleElem::NonTerm(_)) - { - ruleset - .find_rule(&self.rule.rhs[self.dot_pos]) - .into_iter() - .map(|rule| LRItem::<'_, _>::new(rule, af_la_tokens.clone())) - .collect() - } else { - HashSet::new() - } - } - - #[allow(clippy::int_plus_one)] - fn next_dot(&self) -> Option<(&'a RuleElem, LRItem<'a, T>)> { - if self.dot_pos + 1 <= self.rule.rhs.len() { - let bef_token = &self.rule.rhs[self.dot_pos]; - let item = LRItem { - rule: self.rule, - dot_pos: self.dot_pos + 1, - la_tokens: self.la_tokens.clone(), - }; - Some((bef_token, item)) - } else { - None - } - } - - fn unify(&mut self, other: LRItem<'a, T>) { - if self != &other { - return; - } - other.la_tokens.into_iter().for_each(|la_token| { - if !self.la_tokens.contains(&la_token) { - self.la_tokens.insert(la_token); - } - }); - } - - fn unify_all(mut items: Vec>) -> Vec> { - for idx in (0..items.len()).permutations(2) { - let (a_idx, b_idx) = (idx[0], idx[1]); - let tmp = items[b_idx].clone(); - items[a_idx].unify(tmp); - } - items - } - - fn unity_set( - items_a: HashSet>, - items_b: HashSet>, - ) -> HashSet> { - let mut items_a = Vec::from_iter(items_a); - let items_b = Vec::from_iter(items_b); - items_a.extend(items_b); - HashSet::from_iter(Self::unify_all(items_a)) - } -} diff --git a/crates/parse_lr1/src/error.rs b/crates/parse_lr1/src/error.rs deleted file mode 100644 index 4cbb467..0000000 --- a/crates/parse_lr1/src/error.rs +++ /dev/null @@ -1,23 +0,0 @@ -use thiserror::Error; - -use copager_core::error::ParseError as SuperParseError; -use copager_cfg::token::{TokenTag, Token}; - -#[derive(Debug, Error)] -pub enum ParseError { - #[error("Unexpected token {actual:?} found")] - UnexpectedToken { - actual: String, - }, - #[error("Unexpected EOF")] - UnexpectedEOF, -} - -impl ParseError { - pub fn new_unexpected_token(expected: Token) -> SuperParseError { - let err = ParseError::UnexpectedToken { - actual: format!("{:?}", expected.kind), - }; - SuperParseError::from(err).with(expected) - } -} diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs deleted file mode 100644 index 0e1a754..0000000 --- a/crates/parse_lr1/src/lib.rs +++ /dev/null @@ -1,98 +0,0 @@ -#![feature(gen_blocks)] - -mod error; -mod builder; - -use std::collections::HashMap; - -use serde::{Serialize, Deserialize}; - -use copager_cfg::token::Token; -use copager_lex::LexSource; -use copager_parse::{ParseSource, ParseDriver, ParseEvent}; -use copager_utils::cache::Cacheable; - -use builder::{LR1Configure, LRAction}; -use error::ParseError; - -#[derive(Debug)] -pub struct LR1 -where - Sl: LexSource, - Sp: ParseSource, -{ - tables: LR1Configure, -} - -impl Cacheable<(Sl, Sp)> for LR1 -where - Sl: LexSource, - Sl::Tag: Serialize + for<'de> Deserialize<'de>, - Sp: ParseSource, - Sp::Tag: Serialize + for<'de> Deserialize<'de>, -{ - type Cache = LR1Configure; - - fn new((source_l, source_p): (Sl, Sp)) -> anyhow::Result { - Ok(LR1Configure::new(&source_l, &source_p)?) - } - - fn restore(tables: Self::Cache) -> Self { - LR1 { tables } - } -} - -impl ParseDriver for LR1 -where - Sl: LexSource, - Sp: ParseSource, -{ - fn try_from((source_l, source_p): (Sl, Sp)) -> anyhow::Result { - let tables = LR1Configure::new(&source_l, &source_p)?; - Ok(LR1 { tables }) - } - - gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> - where - Il: Iterator>, - { - let mut stack = vec![0]; - loop { - let token = lexer.next(); - loop { - let top = stack[stack.len() - 1]; - let action = match token { - Some(token) => { - let local_action_table: &HashMap<_, _> = &self.tables.action_table[top]; - (local_action_table.get(&token.kind).unwrap(), Some(token)) - }, - None => (&self.tables.eof_action_table[top], None), - }; - match action { - (LRAction::Shift(new_state), Some(token)) => { - stack.push(*new_state); - yield ParseEvent::Read(token); - break; - } - (LRAction::Reduce(tag, goto, elems_cnt), _) => { - stack.truncate(stack.len() - elems_cnt); - stack.push(self.tables.goto_table[stack[stack.len() - 1]][*goto]); - yield ParseEvent::Parse { rule: *tag, len: *elems_cnt }; - } - (LRAction::Accept, _) => { - return; - } - (LRAction::None, Some(token)) => { - yield ParseEvent::Err(ParseError::new_unexpected_token(token).into()); - return; - } - (LRAction::None, None) => { - yield ParseEvent::Err(ParseError::UnexpectedEOF.into()); - return; - } - _ => unreachable!(), - } - } - } - } -} diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr1/tests/simple.rs deleted file mode 100644 index 1acd706..0000000 --- a/crates/parse_lr1/tests/simple.rs +++ /dev/null @@ -1,105 +0,0 @@ -use serde::{Serialize, Deserialize}; - -use copager_cfg::token::TokenTag; -use copager_cfg::rule::{RuleTag, Rule, RuleElem}; -use copager_lex::{LexSource, LexDriver}; -use copager_lex_regex::RegexLexer; -use copager_parse::{ParseSource, ParseDriver, ParseEvent}; -use copager_parse_lr1::LR1; - -#[derive( - Debug, Default, Copy, Clone, Hash, PartialEq, Eq, - LexSource, Serialize, Deserialize -)] -enum ExprToken { - #[default] - #[token(text = r"\+")] - Plus, - #[token(text = r"-")] - Minus, - #[token(text = r"\*")] - Mul, - #[token(text = r"/")] - Div, - #[token(text = r"\(")] - BracketL, - #[token(text = r"\)")] - BracketR, - #[token(text = r"[1-9][0-9]*")] - Num, - #[token(text = r"[ \t\n]+", ignored)] - _Whitespace, -} - -#[derive( - Debug, Default, Copy, Clone, Hash, PartialEq, Eq, - ParseSource, Serialize, Deserialize -)] -enum ExprRule { - #[default] - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} - -type MyLexer = RegexLexer; -type MyParser = LR1; - -const OK_INPUTS: [&str; 10] = [ - "10", - "10 + 20", - "10 - 20", - "10 * 20", - "10 / 20", - "10 + 20 * 30 - 40", - "(10)", - "((((10))))", - "10 * (20 - 30)", - "((10 + 20) * (30 / 40)) - 50", -]; - -const ERR_INPUTS: [&str; 7] = [ - "()", - "(10 -", - "10 +", - "*", - "10 20 + 30", - "10 + 20 * 30 / 40 (", - "(((10))", -]; - -#[test] -fn simple_success() { - for input in &OK_INPUTS { - assert!(parse(input), "{}", input); - } -} - -#[test] -fn simple_failure() { - for input in &ERR_INPUTS { - assert!(!parse(input), "{}", input); - } -} - -fn parse<'input>(input: &'input str) -> bool { - let source = ExprToken::default(); - let lexer = >::try_from(source).unwrap(); - - let source = (ExprToken::default(), ExprRule::default()); - let parser = >::try_from(source).unwrap(); - - let mut parse_itr = parser.run(lexer.run(input)); - let is_err = |state| matches!(state, ParseEvent::Err(_)); - let err_happened = parse_itr.any(is_err); - - !err_happened -} diff --git a/crates/parse_lr_common/Cargo.toml b/crates/parse_lr_common/Cargo.toml new file mode 100644 index 0000000..372c90e --- /dev/null +++ b/crates/parse_lr_common/Cargo.toml @@ -0,0 +1,15 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_common" +version = "0.3.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } +copager_core = { path = "../core" } +copager_cfg = { path = "../cfg" } +copager_parse = { path = "../parse" } +copager_parse_common = { path = "../parse_common" } diff --git a/crates/parse_lr_common/src/automaton.rs b/crates/parse_lr_common/src/automaton.rs new file mode 100644 index 0000000..fc5a8b7 --- /dev/null +++ b/crates/parse_lr_common/src/automaton.rs @@ -0,0 +1,11 @@ +use copager_cfg::token::TokenTag; +use copager_cfg::rule::RuleElem; + +pub mod lr0; +pub mod lr1; +pub mod lalr1; + +pub trait Automaton<'a: 'b, 'b, T: TokenTag + 'a> { + fn len(&self) -> usize; + fn edges(&'b self) -> impl Iterator)>; +} diff --git a/crates/parse_lr_common/src/automaton/lalr1.rs b/crates/parse_lr_common/src/automaton/lalr1.rs new file mode 100644 index 0000000..4c7b4cd --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lalr1.rs @@ -0,0 +1,4 @@ +pub mod dfa; +pub mod item; + +pub use dfa::LALR1DFA; diff --git a/crates/parse_lr_common/src/automaton/lalr1/dfa.rs b/crates/parse_lr_common/src/automaton/lalr1/dfa.rs new file mode 100644 index 0000000..421c68e --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lalr1/dfa.rs @@ -0,0 +1,197 @@ +use std::collections::{HashMap, HashSet}; +use std::rc::Rc; +use std::sync::RwLock; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; + +use crate::automaton::lr1::dfa::{LR1DFA, LR1DFANode}; +use crate::lalr1::item::{LALR1Item, LALR1ItemSet}; +use crate::automaton::Automaton; + +#[derive(Debug)] +pub struct LALR1DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub id: usize, + pub itemset: LALR1ItemSet<'a, T, R>, +} + +impl<'a, T, R> LALR1DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from_lr1_nodes<'b>(id: usize, lr1_dfa_nodes: Vec>>>) -> Self { + let mut rule_la_tokens_map = HashMap::new(); + for lr1_dfa_node in &lr1_dfa_nodes { + for rule in &lr1_dfa_node.read().unwrap().itemset.items { + rule_la_tokens_map + .entry((rule.rule, rule.dot_pos)) + .or_insert_with(HashSet::new) + .insert(rule.la_token); + } + } + + let grouped_items = rule_la_tokens_map + .into_iter() + .map(|((rule, dot_pos), la_tokens)| { + LALR1Item::new(rule, dot_pos, la_tokens.into_iter().collect()) + }) + .collect(); + let itemset = LALR1ItemSet::new(grouped_items); + + LALR1DFANode { id, itemset } + } +} + +impl<'a, T, R> LALR1DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn find_all(&self, rule: &Rule) -> impl Iterator, &[&'a RuleElem])> { + self.find_all_by(move |item| item.rule == rule) + } + + pub fn find_all_by(&self, cond: F) -> impl Iterator, &[&'a RuleElem])> + where + F: Fn(&&LALR1Item<'a, T, R>) -> bool + { + self.itemset + .items + .iter() + .filter(cond) + .map(|item| (item.rule, item.la_tokens.as_slice())) + } +} + +#[derive(Debug)] +pub struct LALR1DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub nodes: Vec>, + pub edges: Vec<(usize, usize, &'a RuleElem)>, +} + +impl<'a, 'b, T, R> From> for LALR1DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(lr1_dfa: LR1DFA<'a, 'b, T, R>) -> Self { + let lalr1_dfa_keys = lr1_dfa.nodes + .into_iter() + .map(LALR1DFAKey::from) + .collect::>(); + + let mut managed_keys = vec![]; + let mut managed_lr1_nodes: Vec> = vec![]; + for lalr1_dfa_key in lalr1_dfa_keys { + let managed_idx = managed_keys.iter().position(|key| key == &lalr1_dfa_key); + if let Some(managed_idx) = managed_idx { + let lr1_node = Rc::clone(&lalr1_dfa_key.0); + managed_lr1_nodes[managed_idx].push(lr1_node); + } else { + managed_lr1_nodes.push(vec![Rc::clone(&lalr1_dfa_key.0)]); + managed_keys.push(lalr1_dfa_key); + } + } + + let mut lalr1_cand_node_sets = managed_keys + .into_iter() + .zip(managed_lr1_nodes.into_iter()) + .collect::>(); + lalr1_cand_node_sets.sort_by_cached_key(|(key, _)| key.0.read().unwrap().id); + + let mut id_map = HashMap::new(); + let mut lalr1_nodes = vec![]; + for (new_id, (_, lalr1_cand_node_set)) in lalr1_cand_node_sets.into_iter().enumerate() { + for lalr1_cand_node in &lalr1_cand_node_set { + let old_id = lalr1_cand_node.read().unwrap().id; + id_map.insert(old_id, new_id); + } + lalr1_nodes.push(LALR1DFANode::from_lr1_nodes(new_id, lalr1_cand_node_set)); + } + + let lalr1_edges = lr1_dfa.edges + .into_iter() + .map(|(from, to, cond)| { + let from = id_map.get(&from).unwrap(); + let to = id_map.get(&to).unwrap(); + (*from, *to, cond) + }) + .collect(); + + LALR1DFA { + nodes: lalr1_nodes, + edges: lalr1_edges, + } + } +} + +impl<'a: 'b, 'b, T, R> Automaton<'a, 'b, T> for LALR1DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn len(&self) -> usize { + self.nodes.len() + } + + fn edges(&'b self) -> impl Iterator)> { + self.edges.iter() + } +} + +#[derive(Debug)] +struct LALR1DFAKey<'a, 'b, T, R> (Rc>>) +where + T: TokenTag, + R: RuleTag; + +impl<'a, 'b, T, R> PartialEq for LALR1DFAKey<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + let self_node = self.0.read().unwrap(); + let other_node = other.0.read().unwrap(); + + if self_node.itemset.items.len() != other_node.itemset.items.len() { + return false; + } + + 'outer: for item in &self_node.itemset.items { + for other_item in &other_node.itemset.items { + if item.rule == other_item.rule && item.dot_pos == other_item.dot_pos { + continue 'outer; + } + } + return false; + } + + true + } +} + +impl<'a, 'b, T, R> Eq for LALR1DFAKey<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, 'b, T, R> From>>> for LALR1DFAKey<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(lr1_dfa_node: Rc>>) -> Self { + Self (lr1_dfa_node) + } +} diff --git a/crates/parse_lr_common/src/automaton/lalr1/item.rs b/crates/parse_lr_common/src/automaton/lalr1/item.rs new file mode 100644 index 0000000..023fb1d --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lalr1/item.rs @@ -0,0 +1,83 @@ +use std::fmt::{Display, Debug}; +use std::hash::Hash; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; + +#[derive(Clone, Hash, PartialEq, Eq)] +pub struct LALR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub rule: &'a Rule, + pub dot_pos: usize, + pub la_tokens: Vec<&'a RuleElem>, +} + +impl<'a, T, R> Display for LALR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} -> ", self.rule.lhs)?; + for (i, elem) in self.rule.rhs.iter().enumerate() { + if i == self.dot_pos { + write!(f, "• ")?; + } + write!(f, "{} ", elem)?; + } + if self.dot_pos == self.rule.rhs.len() { + write!(f, "•")?; + } + write!(f, "[{:?}]", self.la_tokens) + } +} + +impl<'a, T, R> Debug for LALR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + +impl<'a, T, R> LALR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn new(rule: &'a Rule, dot_pos: usize, la_tokens: Vec<&'a RuleElem>) -> Self { + LALR1Item { rule, dot_pos, la_tokens } + } + + pub fn check_next_elem(&self) -> Option<&'a RuleElem> { + if self.dot_pos < self.rule.rhs.len() { + Some(&self.rule.rhs[self.dot_pos]) + } else { + None + } + } +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct LALR1ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub items: Vec>, +} + +impl <'a, T, R> LALR1ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn new(items: Vec>) -> Self { + LALR1ItemSet { items } + } +} diff --git a/crates/parse_lr_common/src/automaton/lr0.rs b/crates/parse_lr_common/src/automaton/lr0.rs new file mode 100644 index 0000000..dbf522f --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr0.rs @@ -0,0 +1,4 @@ +pub mod item; +pub mod dfa; + +pub use dfa::LR0DFA; diff --git a/crates/parse_lr_common/src/automaton/lr0/dfa.rs b/crates/parse_lr_common/src/automaton/lr0/dfa.rs new file mode 100644 index 0000000..1c0b945 --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr0/dfa.rs @@ -0,0 +1,224 @@ +use std::collections::{HashMap, BTreeMap}; +use std::fmt::Debug; +use std::hash::Hash; +use std::rc::Rc; +use std::sync::RwLock; +use std::marker::PhantomData; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; + +use crate::automaton::Automaton; +use crate::lr0::item::{LR0Item, LR0ItemSet}; + +#[derive(Clone)] +pub struct LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub id: usize, + pub itemset: LR0ItemSet<'a, T, R>, + pub next: Vec<(&'a RuleElem, Rc>)>, // (cond, next_node) +} + +impl<'a, T, R> Debug for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + #[derive(Debug)] + #[allow(dead_code)] + struct LR0DFANode<'a, 'b, T, R> + where + T: TokenTag, + R: RuleTag, + { + id: usize, + itemset: &'b LR0ItemSet<'a, T, R>, + next: Vec<(&'a RuleElem, usize)>, + } + + let id = self.id; + let itemset = &self.itemset; + let next = self.next + .iter() + .map(|(cond, next_node)| (*cond, next_node.read().unwrap().id)) + .collect::>(); + + if f.alternate() { + return write!(f, "{:#?}", LR0DFANode { id, itemset, next }); + } else { + write!(f, "{:?}", LR0DFANode { id, itemset, next }) + } + } +} + +impl<'a, T, R> Hash for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.id.hash(state); + self.itemset.hash(state); + } +} + +impl<'a, T, R> PartialEq for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.id == other.id && self.itemset == other.itemset + } +} + +impl<'a, T, R> Eq for LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, T, R> LR0DFANode<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn find_all(&self, rule: &Rule) -> impl Iterator> { + self.find_all_by(move |item| item.rule == rule) + } + + pub fn find_all_by(&self, cond: F) -> impl Iterator> + where + F: Fn(&&LR0Item<'a, T, R>) -> bool + { + self.itemset + .items + .iter() + .filter(cond) + .map(|item| item.rule) + } +} + +#[derive(Debug)] +pub struct LR0DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub nodes: Vec>>>, + pub edges: Vec<(usize, usize, &'a RuleElem)>, +} + +impl<'a, T, R> From<&'a RuleSet> for LR0DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + let dfa_top = LR0DFABuilder::new().start(ruleset); + + let mut nodes = BTreeMap::new(); + let mut edges = vec![]; + let mut stack = vec![dfa_top]; + while let Some(node) = stack.pop() { + let from = node.read().unwrap().id; + if nodes.contains_key(&from) { + continue; + } + for (cond, next_node) in &node.read().unwrap().next { + let to = next_node.read().unwrap().id; + edges.push((from, to, *cond)); + stack.push(Rc::clone(next_node)); + } + nodes.insert(from, Rc::clone(&node)); + } + + let nodes = nodes + .into_iter() + .map(|(_, node)| node) + .collect(); + + LR0DFA { nodes, edges } + } +} + +impl<'a: 'b, 'b, T, R> Automaton<'a, 'b, T> for LR0DFA<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn len(&self) -> usize { + self.nodes.len() + } + + fn edges(&'b self) -> impl Iterator)> { + self.edges.iter() + } +} + +#[derive(Debug)] +struct LR0DFABuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + itemsets: HashMap, Rc>>>, + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl<'a, T, R> LR0DFABuilder<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn new() -> Self { + LR0DFABuilder { + itemsets: HashMap::new(), + _phantom_t: PhantomData, + _phantom_r: PhantomData, + } + } + + fn start(mut self, ruleset: &'a RuleSet) -> Rc>> { + let top = RuleElem::NonTerm(ruleset.top.clone()); + let top = ruleset.rules + .iter() + .find(|rule| rule.lhs == top) + .unwrap(); + let top = LR0ItemSet::from(ruleset).init(top); + + self.gen_recursive(top) + } + + fn gen_recursive(&mut self, mut itemset: LR0ItemSet<'a, T, R>) -> Rc>> + where + T: TokenTag, + { + if let Some(node) = self.itemsets.get(&itemset) { + return Rc::clone(node); + } + + let id = self.itemsets.len(); + let node = LR0DFANode { id, itemset: itemset.clone(), next: vec![] }; + let node = Rc::new(RwLock::new(node)); + self.itemsets.insert(itemset.clone(), Rc::clone(&node)); + + let mut next = vec![]; + for (cond, nextset) in itemset.gen_next_sets() { + next.push((cond, self.gen_recursive(nextset))); + } + node.write().unwrap().next = next; + + Rc::clone(&node) + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/automaton/lr0/item.rs b/crates/parse_lr_common/src/automaton/lr0/item.rs new file mode 100644 index 0000000..1cf6449 --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr0/item.rs @@ -0,0 +1,213 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::{Display, Debug}; +use std::hash::Hash; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; + +#[derive(Clone, Copy, Hash, PartialEq, Eq)] +pub struct LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub rule: &'a Rule, + pub dot_pos: usize, +} + +impl<'a, T, R> Display for LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} -> ", self.rule.lhs)?; + for (i, elem) in self.rule.rhs.iter().enumerate() { + if i == self.dot_pos { + write!(f, "• ")?; + } + write!(f, "{} ", elem)?; + } + if self.dot_pos == self.rule.rhs.len() { + write!(f, "•")?; + } + write!(f, "") + } +} + +impl<'a, T, R> Debug for LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + +impl<'a, T, R> From<&'a Rule> for LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(rule: &'a Rule) -> Self { + if rule.rhs[0] == RuleElem::Epsilon { + LR0Item { rule, dot_pos: 1 } + } else { + LR0Item { rule, dot_pos: 0 } + } + } +} + +impl<'a, T, R> LR0Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn gen_next(&self) -> Self { + assert!(self.dot_pos + 1 <= self.rule.rhs.len()); + LR0Item { + rule: self.rule, + dot_pos: self.dot_pos + 1, + } + } + + pub fn check_next_elem(&self) -> Option<&'a RuleElem> { + if self.dot_pos < self.rule.rhs.len() { + Some(&self.rule.rhs[self.dot_pos]) + } else { + None + } + } +} + +#[derive(Clone)] +pub struct LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub items: Vec>, + ruleset: &'a RuleSet, +} + +impl<'a, T, R> Debug for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if f.alternate() { + write!(f, "{:#?}", self.items) + } else { + write!(f, "{:?}", self.items) + } + } +} + +impl<'a, T, R> From<&'a RuleSet> for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(ruleset: &'a RuleSet) -> Self { + LR0ItemSet { + items: vec![], + ruleset, + } + } +} + +impl<'a, T, R> Hash for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn hash(&self, state: &mut H) { + self.items.hash(state); + } +} + +impl<'a, T, R> PartialEq for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.items == other.items + } +} + +impl <'a, T, R> Eq for LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, T, R> LR0ItemSet<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn init(mut self, rule: &'a Rule) -> Self { + self.items = vec![LR0Item::from(rule)]; + self + } + + pub fn gen_next_sets(&mut self) -> impl Iterator, LR0ItemSet<'a, T, R>)> { + self.expand(); + + let mut next_set_candidates = HashMap::new(); + self.items + .iter() + .filter_map(|item| item.check_next_elem().map(|nelem| (nelem, item))) + .for_each(|(nelem, item) | { + next_set_candidates + .entry(nelem) + .or_insert_with(HashSet::new) + .insert(item.gen_next()); + }); + + next_set_candidates + .into_iter() + .map(|(cond, items)| { + let items = items.into_iter().collect(); + (cond, LR0ItemSet { items, ruleset: self.ruleset }) + }) + } + + fn expand(&mut self) { + let mut modified = true; + while modified { + modified = false; + let new_expaned = self.items + .iter() + .flat_map(|item| self.expand_once(item)) + .flatten() + .collect::>(); + for item in new_expaned { + if self.items.contains(&item) { + continue; + } + self.items.push(item); + modified = true; + } + } + } + + fn expand_once(&self, item: &LR0Item<'a, T, R>) -> Option>> { + if let Some(nonterm@RuleElem::NonTerm(..)) = item.check_next_elem() { + Some(self.ruleset + .find_rule(nonterm) + .into_iter() + .map(|rule| LR0Item::from(rule))) + } else { + None + } + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/automaton/lr1.rs b/crates/parse_lr_common/src/automaton/lr1.rs new file mode 100644 index 0000000..6e822ac --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr1.rs @@ -0,0 +1,4 @@ +pub mod item; +pub mod dfa; + +pub use dfa::LR1DFA; diff --git a/crates/parse_lr_common/src/automaton/lr1/dfa.rs b/crates/parse_lr_common/src/automaton/lr1/dfa.rs new file mode 100644 index 0000000..0696aec --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr1/dfa.rs @@ -0,0 +1,217 @@ +use std::collections::BTreeMap; +use std::fmt::Debug; +use std::rc::Rc; +use std::sync::RwLock; +use std::marker::PhantomData; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; +use copager_parse_common::rule::FirstSet; + +use crate::automaton::Automaton; +use crate::lr1::item::{LR1Item, LR1ItemSet}; + +#[derive(Clone)] +pub struct LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub id: usize, + pub itemset: LR1ItemSet<'a, 'b, T, R>, + pub next: Vec<(&'a RuleElem, Rc>)>, // (cond, next_node) +} + +impl<'a, 'b, T, R> Debug for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + #[derive(Debug)] + #[allow(dead_code)] + struct LR1DFANode<'a, 'b, 'c, T, R> + where + T: TokenTag, + R: RuleTag, + { + id: usize, + itemset: &'c LR1ItemSet<'a, 'b, T, R>, + next: Vec<(&'a RuleElem, usize)>, + } + + let id = self.id; + let itemset = &self.itemset; + let next = self.next + .iter() + .map(|(cond, next_node)| (*cond, next_node.read().unwrap().id)) + .collect::>(); + + if f.alternate() { + return write!(f, "{:#?}", LR1DFANode { id, itemset, next }); + } else { + write!(f, "{:?}", LR1DFANode { id, itemset, next }) + } + } +} + +impl<'a, 'b, T, R> PartialEq for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.id == other.id && self.itemset == other.itemset + } +} + +impl<'a, 'b, T, R> Eq for LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, 'b, T, R> LR1DFANode<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn find_all(&self, rule: &Rule) -> impl Iterator, &'a RuleElem)> { + self.find_all_by(move |item| item.rule == rule) + } + + pub fn find_all_by(&self, cond: F) -> impl Iterator, &'a RuleElem)> + where + F: Fn(&&LR1Item<'a, T, R>) -> bool + { + self.itemset + .items + .iter() + .filter(cond) + .map(|item| (item.rule, item.la_token)) + } +} + +#[derive(Debug)] +pub struct LR1DFA<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub nodes: Vec>>>, + pub edges: Vec<(usize, usize, &'a RuleElem)>, +} + +impl<'a, 'b, T, R> From<(&'a RuleSet, &'b FirstSet<'a, T, R>)> for LR1DFA<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from((ruleset, first_set): (&'a RuleSet, &'b FirstSet<'a, T, R>)) -> Self { + let dfa_top = LR1DFABuilder::new().start(ruleset, &first_set); + + let mut nodes = BTreeMap::new(); + let mut edges = vec![]; + let mut stack = vec![dfa_top]; + while let Some(node) = stack.pop() { + let from = node.read().unwrap().id; + if nodes.contains_key(&from) { + continue; + } + for (cond, next_node) in &node.read().unwrap().next { + let to = next_node.read().unwrap().id; + edges.push((from, to, *cond)); + stack.push(Rc::clone(next_node)); + } + nodes.insert(from, Rc::clone(&node)); + } + + let nodes = nodes + .into_iter() + .map(|(_, node)| node) + .collect(); + + LR1DFA { nodes, edges } + } +} + +impl<'a: 'b, 'b, T, R> Automaton<'a, 'b, T> for LR1DFA<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn len(&self) -> usize { + self.nodes.len() + } + + fn edges(&'b self) -> impl Iterator)> { + self.edges.iter() + } +} + +#[derive(Debug)] +struct LR1DFABuilder<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + managed_itemsets: Vec>, + managed_nodes: Vec>>>, + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl<'a, 'b, T, R> LR1DFABuilder<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn new() -> Self { + LR1DFABuilder { + managed_itemsets: vec![], + managed_nodes: vec![], + _phantom_t: PhantomData, + _phantom_r: PhantomData, + } + } + + fn start(mut self, ruleset: &'a RuleSet, first_set: &'b FirstSet<'a, T, R>) -> Rc>> { + let top = RuleElem::NonTerm(ruleset.top.clone()); + let top = ruleset.rules + .iter() + .find(|rule| rule.lhs == top) + .unwrap(); + let top = LR1ItemSet::from((ruleset, first_set)).init(top); + + self.gen_recursive(top) + } + + fn gen_recursive(&mut self, mut itemset: LR1ItemSet<'a, 'b, T, R>) -> Rc>> + where + T: TokenTag, + { + let managed_idx = self.managed_itemsets.iter().position(|set| set == &itemset); + if let Some(managed_idx) = managed_idx { + return Rc::clone(&self.managed_nodes[managed_idx]); + } + + let id = self.managed_itemsets.len(); + let node = LR1DFANode { id, itemset: itemset.clone(), next: vec![] }; + let node = Rc::new(RwLock::new(node)); + self.managed_itemsets.push(itemset.clone()); + self.managed_nodes.push(Rc::clone(&node)); + + let mut next = vec![]; + for (cond, nextset) in itemset.gen_next_sets() { + next.push((cond, self.gen_recursive(nextset))); + } + node.write().unwrap().next = next; + + Rc::clone(&node) + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/automaton/lr1/item.rs b/crates/parse_lr_common/src/automaton/lr1/item.rs new file mode 100644 index 0000000..4bcf53c --- /dev/null +++ b/crates/parse_lr_common/src/automaton/lr1/item.rs @@ -0,0 +1,216 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::{Display, Debug}; +use std::hash::Hash; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet, RuleTag}; +use copager_parse_common::rule::FirstSet; + +#[derive(Clone, Hash, PartialEq, Eq)] +pub struct LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub rule: &'a Rule, + pub dot_pos: usize, + pub la_token: &'a RuleElem, +} + +impl<'a, T, R> Display for LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} -> ", self.rule.lhs)?; + for (i, elem) in self.rule.rhs.iter().enumerate() { + if i == self.dot_pos { + write!(f, "• ")?; + } + write!(f, "{} ", elem)?; + } + if self.dot_pos == self.rule.rhs.len() { + write!(f, "•")?; + } + write!(f, "[{}]", self.la_token) + } +} + +impl<'a, T, R> Debug for LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + +impl<'a, T, R> From<(&'a Rule, &'a RuleElem)> for LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from((rule, la_token): (&'a Rule, &'a RuleElem)) -> Self { + if rule.rhs[0] == RuleElem::Epsilon { + LR1Item { rule, dot_pos: 1, la_token: &RuleElem::EOF } + } else { + LR1Item { rule, dot_pos: 0, la_token } + } + } +} + +impl<'a, T, R> LR1Item<'a, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn gen_next(&self) -> Self { + assert!(self.dot_pos + 1 <= self.rule.rhs.len()); + LR1Item { + rule: self.rule, + dot_pos: self.dot_pos + 1, + la_token: self.la_token, + } + } + + pub fn check_next_elem(&self) -> Option<&'a RuleElem> { + if self.dot_pos < self.rule.rhs.len() { + Some(&self.rule.rhs[self.dot_pos]) + } else { + None + } + } + + pub fn check_next_elems<'b>(&'b self) -> Vec> { + let mut next_elems = Vec::from(&self.rule.rhs[self.dot_pos..]); + next_elems.push(self.la_token.clone()); + next_elems + } +} + +#[derive(Clone)] +pub struct LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub items: HashSet>, + ruleset: &'a RuleSet, + first_set: &'b FirstSet<'a, T, R>, +} + +impl<'a, 'b, T, R> Debug for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if f.alternate() { + write!(f, "{:#?}", self.items) + } else { + write!(f, "{:?}", self.items) + } + } +} + +impl<'a, 'b, T, R> From<(&'a RuleSet, &'b FirstSet<'a, T, R>)> for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from((ruleset, first_set): (&'a RuleSet, &'b FirstSet<'a, T, R>)) -> Self { + LR1ItemSet { + items: HashSet::new(), + ruleset, + first_set, + } + } +} + +impl<'a, 'b, T, R> PartialEq for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn eq(&self, other: &Self) -> bool { + self.items == other.items + } +} + +impl <'a, 'b, T, R> Eq for LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{} + +impl<'a, 'b, T, R> LR1ItemSet<'a, 'b, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn init(mut self, rule: &'a Rule) -> Self { + self.items = HashSet::from([LR1Item::from((rule, &RuleElem::EOF))]); + self + } + + pub fn gen_next_sets(&mut self) -> impl Iterator, LR1ItemSet<'a, 'b, T, R>)> { + self.expand(); + + let mut next_set_candidates = HashMap::new(); + self.items + .iter() + .filter_map(|item| item.check_next_elem().map(|nelem| (nelem, item))) + .for_each(|(nelem, item) | { + next_set_candidates + .entry(nelem) + .or_insert_with(HashSet::new) + .insert(item.gen_next()); + }); + + next_set_candidates + .into_iter() + .map(|(cond, items)| { + let items = items.into_iter().collect(); + (cond, LR1ItemSet { items, ruleset: self.ruleset, first_set: self.first_set }) + }) + } + + fn expand(&mut self) { + let mut modified = true; + while modified { + modified = false; + let new_expaned = self.items + .iter() + .flat_map(|item| self.expand_once(item)) + .flatten() + .collect::>(); + for item in new_expaned { + modified |= self.items.insert(item); + } + } + } + + fn expand_once(&self, item: &LR1Item<'a, T, R>) -> Option>> { + if let Some(nonterm@RuleElem::NonTerm(..)) = item.check_next_elem() { + Some(self.ruleset + .find_rule(nonterm) + .into_iter() + .flat_map(|rule| { + let next_elems = item.check_next_elems(); + self.first_set + .get_by(&next_elems[1..]) + .into_iter() + .map(move |la_token| LR1Item::from((rule, la_token))) + })) + } else { + None + } + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_common/src/driver.rs b/crates/parse_lr_common/src/driver.rs new file mode 100644 index 0000000..48c8a1f --- /dev/null +++ b/crates/parse_lr_common/src/driver.rs @@ -0,0 +1,87 @@ +use copager_cfg::token::{TokenTag, Token}; +use copager_cfg::rule::{RuleElem, RuleTag}; +use copager_parse::ParseEvent; + +use crate::error::LRError; +use crate::table::{LRAction, LRTable}; + +pub struct LRDriver<'table, T, R> +where + T: TokenTag, + R: RuleTag, +{ + table: &'table LRTable, + stack: Vec, + accepted: bool, +} + +impl<'table, T, R> From<&'table LRTable> for LRDriver<'table, T, R> +where + T: TokenTag, + R: RuleTag, +{ + fn from(table: &'table LRTable) -> Self { + LRDriver { + table, + stack: vec![0], + accepted: false, + } + } +} + +impl<'table, 'input, T, R> LRDriver<'table, T, R> +where + T: TokenTag, + R: RuleTag, +{ + pub fn reset(&mut self) { + self.stack = vec![0]; + } + + pub gen fn consume(&mut self, token: Option>) -> ParseEvent<'input, T, R> { + loop { + let top = self.stack[self.stack.len() - 1]; + let action = self.table.get_action(top, token); + match (action, token) { + (LRAction::Shift(new_state), Some(token)) => { + self.stack.push(*new_state); + yield ParseEvent::Read(token); + break; + }, + (LRAction::Reduce(rule), _) => { + let tag = rule.tag.unwrap(); + let lhs = lhs_as_str(&rule.lhs); + let rhs_len = rule.rhs.len(); + self.stack.truncate(self.stack.len() - rhs_len); + self.stack.push(self.table.get_goto(self.stack[self.stack.len()-1], lhs).unwrap()); + yield ParseEvent::Parse { rule: tag, len: rhs_len }; + }, + (LRAction::Accept, _) => { + self.accepted = true; + return; + } + (LRAction::None, Some(token)) => { + yield ParseEvent::Err(LRError::new_unexpected_token(token).into()); + return; + } + (LRAction::None, None) => { + yield ParseEvent::Err(LRError::new_unexpected_eof().into()); + return; + } + _ => unreachable!(), + } + } + } + + pub fn accepted(&self) -> bool { + self.accepted + } +} + +fn lhs_as_str(lhs: &RuleElem) -> &str { + if let RuleElem::NonTerm(nt) = lhs { + nt.as_str() + } else { + unreachable!() + } +} diff --git a/crates/parse_lr_common/src/error.rs b/crates/parse_lr_common/src/error.rs new file mode 100644 index 0000000..ec8f0ba --- /dev/null +++ b/crates/parse_lr_common/src/error.rs @@ -0,0 +1,48 @@ +use thiserror::Error; + +use copager_core::error::ParseError; +use copager_cfg::token::{TokenTag, Token}; +use copager_cfg::rule::RuleTag; + +use crate::table::LRAction; + +#[derive(Debug, Error)] +pub enum LRError { + #[error("Conflict occured at [{action}]")] + Conflilct { + action: String, + }, + #[error("Unexpected token {actual:?} found")] + UnexpectedToken { + actual: String, + }, + #[error("Unexpected EOF")] + UnexpectedEOF, +} + +impl LRError { + pub fn new_conflict(action: &LRAction) -> ParseError + where + T: TokenTag, + R: RuleTag, + { + let action = match action { + LRAction::Shift(state) => format!("Shift({})", state), + LRAction::Reduce(rule) => format!("Reduce({})", rule), + LRAction::Accept => format!("Accept"), + _ => unimplemented!(), + }; + ParseError::from(LRError::Conflilct{ action }) + } + + pub fn new_unexpected_token(expected: Token) -> ParseError { + let err = LRError::UnexpectedToken { + actual: format!("{:?}", expected.kind), + }; + ParseError::from(err).with(expected) + } + + pub fn new_unexpected_eof() -> ParseError { + ParseError::from(LRError::UnexpectedEOF) + } +} diff --git a/crates/parse_lr_common/src/lib.rs b/crates/parse_lr_common/src/lib.rs new file mode 100644 index 0000000..ef49f00 --- /dev/null +++ b/crates/parse_lr_common/src/lib.rs @@ -0,0 +1,15 @@ +#![feature(gen_blocks)] + +mod automaton; +mod error; +mod driver; +mod table; + +// LR 共通部品 +pub use table::{LRAction, LRTable, LRTableBuilder}; +pub use driver::LRDriver; + +// LR オートマトン +pub use automaton::lr0; +pub use automaton::lr1; +pub use automaton::lalr1; diff --git a/crates/parse_lr_common/src/table.rs b/crates/parse_lr_common/src/table.rs new file mode 100644 index 0000000..bc1be23 --- /dev/null +++ b/crates/parse_lr_common/src/table.rs @@ -0,0 +1,136 @@ +use std::collections::HashMap; + +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; + +use crate::automaton::Automaton; +use crate::error::LRError; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum LRAction +where + T: TokenTag, + R: RuleTag, +{ + Shift(usize), + Reduce(Rule), + Accept, + None, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LRTable +where + T: TokenTag, + R: RuleTag, +{ + pub action_table: Vec>>, + pub eof_action_table: Vec>, + pub goto_table: Vec>, +} + +impl LRTable +where + T: TokenTag, + R: RuleTag, +{ + pub fn get_action(&self, state: usize, token: Option>) -> &LRAction { + if let Some(token) = token { + return &self.action_table[state].get(&token.kind).unwrap_or(&LRAction::None) + } else { + return &self.eof_action_table[state] + } + } + + pub fn get_goto(&self, state: usize, nonterm: &str) -> Option { + self.goto_table[state].get(nonterm).copied() + } +} + +#[derive(Debug)] +pub struct LRTableBuilder +where + T: TokenTag, + R: RuleTag, +{ + pub action_table: Vec>>, + pub eof_action_table: Vec>, + pub goto_table: Vec>, +} + +impl<'a: 'b, 'b, T, R> LRTableBuilder +where + T: TokenTag + 'a, + R: RuleTag, +{ + pub fn from(automaton: &'b impl Automaton<'a, 'b, T>) -> Self { + let size = automaton.len(); + + // 初期化 + let mut action_table: Vec>> = Vec::with_capacity(size); + let mut eof_action_table = Vec::with_capacity(size); + let mut goto_table = Vec::with_capacity(size); + for _ in 0..size { + action_table.push(HashMap::new()); + eof_action_table.push(LRAction::None); + goto_table.push(HashMap::new()); + } + + // 表の作成 + for (from, to, elem) in automaton.edges() { + match elem { + RuleElem::Term(token) => { + action_table[*from].insert(*token, LRAction::Shift(*to)); + } + RuleElem::NonTerm(name) => { + goto_table[*from].insert(name.clone(), *to); + }, + _ => {} + } + } + + LRTableBuilder { + action_table, + eof_action_table, + goto_table, + } + } + + pub fn set(&mut self, state: usize, token: Option, action: LRAction) { + if let Some(token) = token { + self.action_table[state].insert(token, action); + } else { + self.eof_action_table[state] = action; + } + } + + pub fn try_set(&mut self, state: usize, token: Option, action: LRAction) -> anyhow::Result<()>{ + if let Some(token) = token { + if self.action_table[state].contains_key(&token) { + return Err(LRError::new_conflict(&action).into()); + } + self.action_table[state].insert(token, action); + } else { + if self.eof_action_table[state] != LRAction::None { + return Err(LRError::new_conflict(&action).into()); + } + self.eof_action_table[state] = action; + } + Ok(()) + } + + pub fn build(self) -> LRTable { + LRTable { + action_table: self.action_table, + eof_action_table: self.eof_action_table, + goto_table: self.goto_table, + } + } +} + +#[cfg(test)] +mod test { + // TODO +} diff --git a/crates/parse_lr_lalr1/Cargo.toml b/crates/parse_lr_lalr1/Cargo.toml new file mode 100644 index 0000000..133e361 --- /dev/null +++ b/crates/parse_lr_lalr1/Cargo.toml @@ -0,0 +1,24 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_lalr1" +version = "0.3.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_parse_common = { path = "../parse_common" } +copager_parse_lr_common = { path = "../parse_lr_common" } +copager_utils = { path = "../utils" } + +[dev-dependencies] +copager_core = { path = "../core" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/parse_lr_lalr1/src/lib.rs b/crates/parse_lr_lalr1/src/lib.rs new file mode 100644 index 0000000..553f232 --- /dev/null +++ b/crates/parse_lr_lalr1/src/lib.rs @@ -0,0 +1,137 @@ +#![feature(gen_blocks)] + +use std::marker::PhantomData; + +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; +use copager_lex::LexSource; +use copager_parse::{BaseParser, ParseSource, ParseEvent}; +use copager_parse_common::rule::FirstSet; +use copager_parse_lr_common::lr1::LR1DFA; +use copager_parse_lr_common::lalr1::item::LALR1Item; +use copager_parse_lr_common::lalr1::LALR1DFA; +use copager_parse_lr_common::{LRDriver, LRAction, LRTable, LRTableBuilder}; +use copager_utils::cache::Cacheable; + +pub struct LALR1 +where + T: TokenTag, + R: RuleTag +{ + table: LRTable, +} + +impl BaseParser for LALR1 +where + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LALR1Table::try_from(source_p)?; + Ok(LALR1 { table }) + } + + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut driver = LRDriver::from(&self.table); + while !driver.accepted() { + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } + } +} + +impl Cacheable<(Sl, Sp)> for LALR1 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LRTable; + + fn new((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LALR1Table::try_from(source_p)?; + Ok(table) + } + + fn restore(table: Self::Cache) -> Self { + LALR1 { table } + } +} + +pub struct LALR1Table +where + T: TokenTag, + R: RuleTag +{ + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl LALR1Table +where + T: TokenTag, + R: RuleTag, +{ + fn try_from(source_p: Sp) -> anyhow::Result> + where + Sp: ParseSource, + { + // 最上位規則を追加して RuleSet を更新 + let mut ruleset = source_p.into_ruleset(); + let top_dummy = Rule::new( + None, + RuleElem::new_nonterm("__top_dummy"), + vec![RuleElem::new_nonterm(&ruleset.top)], + ); + ruleset.update_top(top_dummy.clone()); + + // First 集合作成 + let first_set = FirstSet::from(&ruleset); + + // LALR(1) オートマトン作成 + let dfa = LR1DFA::from((&ruleset, &first_set)); + let dfa = LALR1DFA::from(dfa); + + // LALR(1) 構文解析表作成 + let mut builder = LRTableBuilder::from(&dfa); + for node in &dfa.nodes { + for (rule, la_tokens) in node.find_all_by(is_lalr1_reduce_state) { + // A -> α β . [la_token] を含む場合,la_token 列に対して Reduce をマーク + for la_token in la_tokens { + match la_token { + RuleElem::Term(term) => { + builder.try_set(node.id, Some(*term), LRAction::Reduce(rule.clone()))?; + } + RuleElem::EOF => { + builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; + } + _ => {} + } + } + + // S -> Top . を含む場合,EOF 列に対して Accept をマーク + if rule == &top_dummy { + builder.set(node.id, None, LRAction::Accept); + } + } + } + let table = builder.build(); + + Ok(table) + } +} + +fn is_lalr1_reduce_state(item: &&LALR1Item) -> bool +where + T: TokenTag, + R: RuleTag, +{ + item.check_next_elem().is_none() +} diff --git a/crates/parse_lr_lalr1/tests/simple.rs b/crates/parse_lr_lalr1/tests/simple.rs new file mode 100644 index 0000000..0776feb --- /dev/null +++ b/crates/parse_lr_lalr1/tests/simple.rs @@ -0,0 +1,100 @@ +use copager_core::{Language, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr_lalr1::LALR1; +use copager_ir_void::Void; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum TestToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum TestRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type TestLanguage = Language; +type TestLexer = RegexLexer; +type TestParser = LALR1; +type TestProcessor = Processor; + +#[test] +fn simple_success() { + const OK_INPUTS: [&str; 10] = [ + "10", + "10 + 20", + "10 - 20", + "10 * 20", + "10 / 20", + "10 + 20 * 30 - 40", + "(10)", + "((((10))))", + "10 * (20 - 30)", + "((10 + 20) * (30 / 40)) - 50", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &OK_INPUTS { + println!("input: {}", input); + processor.process::(input).unwrap(); + } +} + +#[test] +fn simple_failure() { + const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "*", + "10 20 + 30", + "10 + 20 * 30 / 40 (", + "(((10))", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &ERR_INPUTS { + assert!(processor.process::(input).is_err(), "input: {}", input); + } +} diff --git a/crates/parse_lr1/Cargo.toml b/crates/parse_lr_lr0/Cargo.toml similarity index 65% rename from crates/parse_lr1/Cargo.toml rename to crates/parse_lr_lr0/Cargo.toml index 3d628d4..2be284e 100644 --- a/crates/parse_lr1/Cargo.toml +++ b/crates/parse_lr_lr0/Cargo.toml @@ -1,23 +1,23 @@ cargo-features = ["edition2024"] [package] -name = "copager_parse_lr1" -version = "0.2.0" +name = "copager_parse_lr_lr0" +version = "0.3.0" edition = "2024" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde = { workspace = true } -itertools = "0.12.1" -copager_core = { path = "../core" } +serde = { workspace = true, features = ["derive"] } copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } +copager_parse_lr_common = { path = "../parse_lr_common" } copager_utils = { path = "../utils" } [dev-dependencies] +copager_core = { path = "../core" } copager_lex = { path = "../lex", features = ["derive"] } -copager_lex_regex = { path = "../lex_regex" } +copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } -copager_parse_lr1 = { path = "../parse_lr1" } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/parse_lr_lr0/src/lib.rs b/crates/parse_lr_lr0/src/lib.rs new file mode 100644 index 0000000..d4f2d9f --- /dev/null +++ b/crates/parse_lr_lr0/src/lib.rs @@ -0,0 +1,128 @@ +#![feature(gen_blocks)] + +use std::marker::PhantomData; + +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; +use copager_lex::LexSource; +use copager_parse::{BaseParser, ParseSource, ParseEvent}; +use copager_parse_lr_common::lr0::item::LR0Item; +use copager_parse_lr_common::lr0::LR0DFA; +use copager_parse_lr_common::{LRDriver, LRAction, LRTable, LRTableBuilder}; +use copager_utils::cache::Cacheable; + +pub struct LR0 +where + T: TokenTag, + R: RuleTag +{ + table: LRTable, +} + +impl BaseParser for LR0 +where + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from((source_l, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR0Table::try_from(source_l, source_p)?; + Ok(LR0 { table }) + } + + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut driver = LRDriver::from(&self.table); + while !driver.accepted() { + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } + } +} + +impl Cacheable<(Sl, Sp)> for LR0 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LRTable; + + fn new((source_l, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR0Table::try_from(source_l, source_p)?; + Ok(table) + } + + fn restore(table: Self::Cache) -> Self { + LR0 { table } + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct LR0Table +where + T: TokenTag, + R: RuleTag +{ + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl LR0Table +where + T: TokenTag, + R: RuleTag, +{ + fn try_from(source_l: Sl, source_p: Sp) -> anyhow::Result> + where + Sl: LexSource, + Sp: ParseSource, + { + // 最上位規則を追加して RuleSet を更新 + let mut ruleset = source_p.into_ruleset(); + let top_dummy = Rule::new( + None, + RuleElem::new_nonterm("__top_dummy"), + vec![RuleElem::new_nonterm(&ruleset.top)], + ); + ruleset.update_top(top_dummy.clone()); + + // LR(0) オートマトン作成 + let dfa = LR0DFA::from(&ruleset); + + // LR(0) 構文解析表作成 + let mut builder = LRTableBuilder::from(&dfa); + for node in dfa.nodes { + let node = node.read().unwrap(); + for rule in node.find_all_by(is_lr0_reduce_state) { + // S -> Top . を含む場合,EOF 列に対して Accept をマーク + if rule == &top_dummy { + builder.set(node.id, None, LRAction::Accept); + continue; + } + + // A -> α β . を含む場合 全列に Reduce をマーク + builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; + for token in source_l.iter() { + builder.try_set(node.id, Some(token), LRAction::Reduce(rule.clone()))?; + } + } + } + let table = builder.build(); + + Ok(table) + } +} + +fn is_lr0_reduce_state(item: &&LR0Item) -> bool +where + T: TokenTag, + R: RuleTag, +{ + item.check_next_elem().is_none() +} diff --git a/crates/parse_lr_lr0/tests/simple.rs b/crates/parse_lr_lr0/tests/simple.rs new file mode 100644 index 0000000..238b61d --- /dev/null +++ b/crates/parse_lr_lr0/tests/simple.rs @@ -0,0 +1,90 @@ +use copager_core::{Language, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr_lr0::LR0; +use copager_ir_void::Void; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum TestToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum TestRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type TestLanguage = Language; +type TestLexer = RegexLexer; +type TestParser = LR0; +type TestProcessor = Processor; + +#[test] +fn simple_success() { + const OK_INPUTS: [&str; 8] = [ + "10", + "10 + 20", + "10 - 20", + "10 + 20 + 30", + "(10)", + "((((10))))", + "10 + (20 - 30)", + "(10 + 20) - 30", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &OK_INPUTS { + println!("input: {}", input); + processor.process::(input).unwrap(); + } +} + +#[test] +fn simple_failure() { + const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "+", + "10 20 + 30", + "10 + 20 - 30 (", + "(((10))", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &ERR_INPUTS { + assert!(processor.process::(input).is_err(), "input: {}", input); + } +} diff --git a/crates/parse_lr_lr1/Cargo.toml b/crates/parse_lr_lr1/Cargo.toml new file mode 100644 index 0000000..9984ed6 --- /dev/null +++ b/crates/parse_lr_lr1/Cargo.toml @@ -0,0 +1,24 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_lr1" +version = "0.3.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_parse_common = { path = "../parse_common" } +copager_parse_lr_common = { path = "../parse_lr_common" } +copager_utils = { path = "../utils" } + +[dev-dependencies] +copager_core = { path = "../core" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/parse_lr_lr1/src/lib.rs b/crates/parse_lr_lr1/src/lib.rs new file mode 100644 index 0000000..f92961e --- /dev/null +++ b/crates/parse_lr_lr1/src/lib.rs @@ -0,0 +1,134 @@ +#![feature(gen_blocks)] + +use std::marker::PhantomData; + +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; +use copager_lex::LexSource; +use copager_parse::{BaseParser, ParseSource, ParseEvent}; +use copager_parse_common::rule::FirstSet; +use copager_parse_lr_common::lr1::item::LR1Item; +use copager_parse_lr_common::lr1::LR1DFA; +use copager_parse_lr_common::{LRDriver, LRAction, LRTable, LRTableBuilder}; +use copager_utils::cache::Cacheable; + +pub struct LR1 +where + T: TokenTag, + R: RuleTag +{ + table: LRTable, +} + +impl BaseParser for LR1 +where + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR1Table::try_from(source_p)?; + Ok(LR1 { table }) + } + + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut driver = LRDriver::from(&self.table); + while !driver.accepted() { + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } + } +} + +impl Cacheable<(Sl, Sp)> for LR1 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LRTable; + + fn new((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = LR1Table::try_from(source_p)?; + Ok(table) + } + + fn restore(table: Self::Cache) -> Self { + LR1 { table } + } +} + +pub struct LR1Table +where + T: TokenTag, + R: RuleTag +{ + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl LR1Table +where + T: TokenTag, + R: RuleTag, +{ + fn try_from(source_p: Sp) -> anyhow::Result> + where + Sp: ParseSource, + { + // 最上位規則を追加して RuleSet を更新 + let mut ruleset = source_p.into_ruleset(); + let top_dummy = Rule::new( + None, + RuleElem::new_nonterm("__top_dummy"), + vec![RuleElem::new_nonterm(&ruleset.top)], + ); + ruleset.update_top(top_dummy.clone()); + + // First 集合作成 + let first_set = FirstSet::from(&ruleset); + + // LR(1) オートマトン作成 + let dfa = LR1DFA::from((&ruleset, &first_set)); + + // LR(1) 構文解析表作成 + let mut builder = LRTableBuilder::from(&dfa); + for node in &dfa.nodes { + let node = node.read().unwrap(); + for (rule, la_token) in node.find_all_by(is_lr1_reduce_state) { + // A -> α β . [la_token] を含む場合,la_token 列に対して Reduce をマーク + match la_token { + RuleElem::Term(term) => { + builder.try_set(node.id, Some(*term), LRAction::Reduce(rule.clone()))?; + } + RuleElem::EOF => { + builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; + } + _ => {} + } + + // S -> Top . を含む場合,EOF 列に対して Accept をマーク + if rule == &top_dummy { + builder.set(node.id, None, LRAction::Accept); + } + } + } + let table = builder.build(); + + Ok(table) + } +} + +fn is_lr1_reduce_state(item: &&LR1Item) -> bool +where + T: TokenTag, + R: RuleTag, +{ + item.check_next_elem().is_none() +} diff --git a/crates/parse_lr_lr1/tests/simple.rs b/crates/parse_lr_lr1/tests/simple.rs new file mode 100644 index 0000000..cb90c8a --- /dev/null +++ b/crates/parse_lr_lr1/tests/simple.rs @@ -0,0 +1,100 @@ +use copager_core::{Language, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr_lr1::LR1; +use copager_ir_void::Void; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum TestToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum TestRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type TestLanguage = Language; +type TestLexer = RegexLexer; +type TestParser = LR1; +type TestProcessor = Processor; + +#[test] +fn simple_success() { + const OK_INPUTS: [&str; 10] = [ + "10", + "10 + 20", + "10 - 20", + "10 * 20", + "10 / 20", + "10 + 20 * 30 - 40", + "(10)", + "((((10))))", + "10 * (20 - 30)", + "((10 + 20) * (30 / 40)) - 50", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &OK_INPUTS { + println!("input: {}", input); + processor.process::(input).unwrap(); + } +} + +#[test] +fn simple_failure() { + const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "*", + "10 20 + 30", + "10 + 20 * 30 / 40 (", + "(((10))", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &ERR_INPUTS { + assert!(processor.process::(input).is_err(), "input: {}", input); + } +} diff --git a/crates/parse_lr_slr1/Cargo.toml b/crates/parse_lr_slr1/Cargo.toml new file mode 100644 index 0000000..493f49a --- /dev/null +++ b/crates/parse_lr_slr1/Cargo.toml @@ -0,0 +1,24 @@ +cargo-features = ["edition2024"] + +[package] +name = "copager_parse_lr_slr1" +version = "0.3.0" +edition = "2024" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true, features = ["derive"] } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_parse_common = { path = "../parse_common" } +copager_parse_lr_common = { path = "../parse_lr_common" } +copager_utils = { path = "../utils" } + +[dev-dependencies] +copager_core = { path = "../core" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/parse_lr_slr1/src/lib.rs b/crates/parse_lr_slr1/src/lib.rs new file mode 100644 index 0000000..d7f23dd --- /dev/null +++ b/crates/parse_lr_slr1/src/lib.rs @@ -0,0 +1,146 @@ +#![feature(gen_blocks)] + +use std::marker::PhantomData; + +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::{Token, TokenTag}; +use copager_cfg::rule::{Rule, RuleElem, RuleTag}; +use copager_lex::LexSource; +use copager_parse::{BaseParser, ParseSource, ParseEvent}; +use copager_parse_common::rule::FollowSet; +use copager_parse_lr_common::lr0::item::LR0Item; +use copager_parse_lr_common::lr0::LR0DFA; +use copager_parse_lr_common::{LRDriver, LRAction, LRTable, LRTableBuilder}; +use copager_utils::cache::Cacheable; + +pub struct SLR1 +where + T: TokenTag, + R: RuleTag +{ + table: LRTable, +} + +impl BaseParser for SLR1 +where + Sl: LexSource, + Sp: ParseSource, +{ + fn try_from((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = SLR1Table::try_from(source_p)?; + Ok(SLR1 { table }) + } + + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> + where + Il: Iterator>, + { + let mut driver = LRDriver::from(&self.table); + while !driver.accepted() { + for event in driver.consume(lexer.next()).collect::>() { + yield event; + } + } + } +} + +impl Cacheable<(Sl, Sp)> for SLR1 +where + Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, + Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, +{ + type Cache = LRTable; + + fn new((_, source_p): (Sl, Sp)) -> anyhow::Result { + let table = SLR1Table::try_from(source_p)?; + Ok(table) + } + + fn restore(table: Self::Cache) -> Self { + SLR1 { table } + } +} + +pub struct SLR1Table +where + T: TokenTag, + R: RuleTag +{ + _phantom_t: PhantomData, + _phantom_r: PhantomData, +} + +impl SLR1Table +where + T: TokenTag, + R: RuleTag, +{ + fn try_from(source_p: Sp) -> anyhow::Result> + where + Sp: ParseSource, + { + // 最上位規則を追加して RuleSet を更新 + let mut ruleset = source_p.into_ruleset(); + let top_dummy = Rule::new( + None, + RuleElem::new_nonterm("__top_dummy"), + vec![RuleElem::new_nonterm(&ruleset.top)], + ); + ruleset.update_top(top_dummy.clone()); + + // Follow 集合作成 + let follow_set = FollowSet::from(&ruleset); + + // LR(0) オートマトン作成 + let dfa = LR0DFA::from(&ruleset); + + // SLR(1) 構文解析表作成 + let mut builder = LRTableBuilder::from(&dfa); + for node in dfa.nodes { + let node = node.read().unwrap(); + + // A -> α β . を含む場合,Follow(A) 列に対して Reduce をマーク + for rule in node.find_all_by(is_slr1_reduce_state) { + let lhs = lhs_as_str(&rule.lhs); + for term in follow_set.get(lhs).unwrap() { + match term { + RuleElem::Term(term) => { + builder.try_set(node.id, Some(*term), LRAction::Reduce(rule.clone()))?; + } + RuleElem::EOF => { + builder.try_set(node.id, None, LRAction::Reduce(rule.clone()))?; + } + _ => {} + } + } + + // S -> Top . を含む場合,EOF 列に対して Accept をマーク + if rule == &top_dummy { + builder.set(node.id, None, LRAction::Accept); + } + } + } + let table = builder.build(); + + Ok(table) + } +} + +fn is_slr1_reduce_state(item: &&LR0Item) -> bool +where + T: TokenTag, + R: RuleTag, +{ + item.check_next_elem().is_none() +} + +fn lhs_as_str(lhs: &RuleElem) -> &str { + if let RuleElem::NonTerm(nt) = lhs { + nt.as_str() + } else { + unreachable!() + } +} diff --git a/crates/parse_lr_slr1/tests/simple.rs b/crates/parse_lr_slr1/tests/simple.rs new file mode 100644 index 0000000..29096c4 --- /dev/null +++ b/crates/parse_lr_slr1/tests/simple.rs @@ -0,0 +1,100 @@ +use copager_core::{Language, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr_slr1::SLR1; +use copager_ir_void::Void; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum TestToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum TestRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type TestLanguage = Language; +type TestLexer = RegexLexer; +type TestParser = SLR1; +type TestProcessor = Processor; + +#[test] +fn simple_success() { + const OK_INPUTS: [&str; 10] = [ + "10", + "10 + 20", + "10 - 20", + "10 * 20", + "10 / 20", + "10 + 20 * 30 - 40", + "(10)", + "((((10))))", + "10 * (20 - 30)", + "((10 + 20) * (30 / 40)) - 50", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &OK_INPUTS { + println!("input: {}", input); + processor.process::(input).unwrap(); + } +} + +#[test] +fn simple_failure() { + const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "*", + "10 20 + 30", + "10 + 20 * 30 / 40 (", + "(((10))", + ]; + + let processor = TestProcessor::new() + .build_lexer() + .unwrap() + .build_parser() + .unwrap(); + + for input in &ERR_INPUTS { + assert!(processor.process::(input).is_err(), "input: {}", input); + } +} diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 3a9ffc7..10cbc86 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_utils" -version = "0.1.1" +version = "0.3.0" edition = "2021" [dependencies] diff --git a/examples/build_oneshot/Cargo.toml b/examples/build_oneshot/Cargo.toml new file mode 100644 index 0000000..3fbf60a --- /dev/null +++ b/examples/build_oneshot/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "example_build_oneshot" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager = { path = "../..", features = ["derive", "regexlex", "lr1", "sexp"] } diff --git a/examples/oneshot/src/main.rs b/examples/build_oneshot/src/main.rs similarity index 81% rename from examples/oneshot/src/main.rs rename to examples/build_oneshot/src/main.rs index 8971e61..b5a0710 100644 --- a/examples/oneshot/src/main.rs +++ b/examples/build_oneshot/src/main.rs @@ -1,10 +1,10 @@ -use std::io::stdin; +use std::io::{stdin, stdout, Write}; use copager::lex::{LexSource, RegexLexer}; use copager::parse::{ParseSource, LR1}; use copager::ir::SExp; use copager::prelude::*; -use copager::{Grammar, Processor}; +use copager::{Language, Processor}; #[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] enum ExprToken { @@ -43,12 +43,16 @@ enum ExprRule { Num, } -type MyGrammar = Grammar; +type MyLanguage = Language; type MyLexer = RegexLexer; type MyParser = LR1; -type MyProcessor = Processor; +type MyProcessor = Processor; fn main() -> anyhow::Result<()> { + println!("Example "); + print!("Input: "); + stdout().flush()?; + let mut input = String::new(); stdin().read_line(&mut input)?; @@ -56,7 +60,7 @@ fn main() -> anyhow::Result<()> { .build_lexer()? .build_parser()? .process::>(&input)?; - println!("Success : {}", sexp); + println!("Success: {}", sexp); Ok(()) } diff --git a/examples/prebuild/Cargo.toml b/examples/build_prebuild/Cargo.toml similarity index 61% rename from examples/prebuild/Cargo.toml rename to examples/build_prebuild/Cargo.toml index c3ffa74..2b9743f 100644 --- a/examples/prebuild/Cargo.toml +++ b/examples/build_prebuild/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "example_prebuild" +name = "example_build_prebuild" version = "0.1.0" edition = "2021" @@ -7,9 +7,9 @@ edition = "2021" anyhow = { workspace = true } thiserror = { workspace = true } copager = { path = "../..", features = ["prebuild", "sexp"] } -grammar = { package = "example_prebuild_grammar", path = "./grammar" } +language = { package = "example_prebuild_language", path = "./language" } [build-dependencies] serde = { workspace = true } copager = { path = "../..", features = ["prebuild"] } -grammar = { package = "example_prebuild_grammar", path = "./grammar" } +language = { package = "example_prebuild_language", path = "./language" } diff --git a/examples/prebuild/build.rs b/examples/build_prebuild/build.rs similarity index 81% rename from examples/prebuild/build.rs rename to examples/build_prebuild/build.rs index 69c88be..1d9ffc3 100644 --- a/examples/prebuild/build.rs +++ b/examples/build_prebuild/build.rs @@ -1,4 +1,4 @@ -use grammar::MyProcessor; +use language::MyProcessor; #[copager::prebuild] fn main() -> MyProcessor { diff --git a/examples/prebuild/grammar/Cargo.toml b/examples/build_prebuild/language/Cargo.toml similarity index 87% rename from examples/prebuild/grammar/Cargo.toml rename to examples/build_prebuild/language/Cargo.toml index 440c658..7da586a 100644 --- a/examples/prebuild/grammar/Cargo.toml +++ b/examples/build_prebuild/language/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "example_prebuild_grammar" +name = "example_prebuild_language" version = "0.1.0" edition = "2021" diff --git a/examples/prebuild/grammar/src/lib.rs b/examples/build_prebuild/language/src/lib.rs similarity index 88% rename from examples/prebuild/grammar/src/lib.rs rename to examples/build_prebuild/language/src/lib.rs index c1de489..617ee3c 100644 --- a/examples/prebuild/grammar/src/lib.rs +++ b/examples/build_prebuild/language/src/lib.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; use copager::lex::{LexSource, RegexLexer}; use copager::parse::{ParseSource, LR1}; use copager::prelude::*; -use copager::{Grammar, Processor}; +use copager::{Language, Processor}; #[derive( Debug, Default, Copy, Clone, Hash, PartialEq, Eq, @@ -48,7 +48,7 @@ pub enum ExprRule { Num, } -pub type MyGrammar = Grammar; +pub type MyLanguage = Language; pub type MyLexer = RegexLexer; pub type MyParser = LR1; -pub type MyProcessor = Processor; +pub type MyProcessor = Processor; diff --git a/examples/prebuild/src/main.rs b/examples/build_prebuild/src/main.rs similarity index 62% rename from examples/prebuild/src/main.rs rename to examples/build_prebuild/src/main.rs index ccb8ee7..0060e37 100644 --- a/examples/prebuild/src/main.rs +++ b/examples/build_prebuild/src/main.rs @@ -1,11 +1,15 @@ -use std::io::stdin; +use std::io::{stdin, stdout, Write}; use copager::ir::SExp; -use grammar::MyProcessor; +use language::MyProcessor; #[copager::load] fn main(processor: MyProcessor) -> anyhow::Result<()> { + println!("Example "); + print!("Input: "); + stdout().flush()?; + let mut input = String::new(); stdin().read_line(&mut input)?; @@ -13,7 +17,7 @@ fn main(processor: MyProcessor) -> anyhow::Result<()> { .build_lexer()? .build_parser_by_cache() .process::>(&input)?; - println!("Success : {}", sexp); + println!("Success: {}", sexp); Ok(()) } diff --git a/examples/lang_arithmetic/Cargo.toml b/examples/lang_arithmetic/Cargo.toml new file mode 100644 index 0000000..032efc3 --- /dev/null +++ b/examples/lang_arithmetic/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "example_lang_arithmetic" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager = { path = "../..", features = ["derive", "regexlex", "lr1", "sexp"] } diff --git a/examples/lang_arithmetic/src/lib.rs b/examples/lang_arithmetic/src/lib.rs new file mode 100644 index 0000000..16cfb6a --- /dev/null +++ b/examples/lang_arithmetic/src/lib.rs @@ -0,0 +1,43 @@ +use copager::lex::LexSource; +use copager::parse::ParseSource; +use copager::prelude::*; +use copager::Language; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +pub enum ArithmeticToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +pub enum ArithmeticRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +pub type Arithmetic = Language; diff --git a/examples/lang_arithmetic/src/main.rs b/examples/lang_arithmetic/src/main.rs new file mode 100644 index 0000000..50f70cd --- /dev/null +++ b/examples/lang_arithmetic/src/main.rs @@ -0,0 +1,29 @@ +use std::io::{stdin, stdout, Write}; + +use copager::lex::RegexLexer; +use copager::parse::LR1; +use copager::ir::SExp; +use copager::Processor; + +use example_lang_arithmetic::*; + +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +fn main() -> anyhow::Result<()> { + println!("Example "); + print!("Input: "); + stdout().flush()?; + + let mut input = String::new(); + stdin().read_line(&mut input)?; + + let sexp = MyProcessor::new() + .build_lexer()? + .build_parser()? + .process::>(&input)?; + println!("Success: {}", sexp); + + Ok(()) +} diff --git a/examples/lang_json/Cargo.toml b/examples/lang_json/Cargo.toml new file mode 100644 index 0000000..2bdc8a7 --- /dev/null +++ b/examples/lang_json/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "example_lang_json" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager = { path = "../..", features = ["derive", "regexlex", "lr1", "sexp"] } diff --git a/examples/lang_json/src/lib.rs b/examples/lang_json/src/lib.rs new file mode 100644 index 0000000..7dc88c6 --- /dev/null +++ b/examples/lang_json/src/lib.rs @@ -0,0 +1,92 @@ +use copager::lex::LexSource; +use copager::parse::ParseSource; +use copager::prelude::*; +use copager::Language; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +pub enum JsonToken { + // 記号 + #[token(text = r"\:")] + Colon, + #[token(text = r"\,")] + Comma, + + // キーワード + #[token(text = r"true")] + True, + #[token(text = r"false")] + False, + #[token(text = r"null")] + Null, + + // 識別子 & 数値 + #[token(text = r#""[a-zA-Z_][a-zA-Z0-9_]*""#)] + String, + #[token(text = r"\d+")] + Number, + + // オブジェクト用括弧 + #[default] + #[token(text = r"\{")] + CurlyBracketL, + #[token(text = r"\}")] + CurlyBracketR, + + // 配列用括弧 + #[token(text = r"\[")] + SquareBracketL, + #[token(text = r"\]")] + SquareBracketR, + + // 空白文字 + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +pub enum JsonRule { + // JSON本体 + #[default] + #[rule(" ::= ")] + #[rule(" ::= ")] + Json, + + #[rule(" ::= ")] + #[rule(" ::= ")] + Item, + + // 配列 + #[rule(" ::= SquareBracketL SquareBracketR")] + Array, + + #[rule(" ::= Comma ")] + #[rule(" ::= ")] + #[rule(" ::= ")] + ValueList, + + // オブジェクト + #[rule(" ::= CurlyBracketL CurlyBracketR")] + Object, + + #[rule(" ::= Comma ")] + #[rule(" ::= ")] + #[rule(" ::= ")] + KeyValueList, + + #[rule(" ::= Colon ")] + KeyValue, + + #[rule(" ::= String")] + Key, + + #[rule(" ::= ")] + #[rule(" ::= ")] + #[rule(" ::= String")] + #[rule(" ::= Number")] + #[rule(" ::= True")] + #[rule(" ::= False")] + #[rule(" ::= Null")] + Value, +} + +pub type Json = Language; diff --git a/examples/lang_json/src/main.rs b/examples/lang_json/src/main.rs new file mode 100644 index 0000000..10b7906 --- /dev/null +++ b/examples/lang_json/src/main.rs @@ -0,0 +1,29 @@ +use std::io::{stdin, stdout, Write}; + +use copager::lex::RegexLexer; +use copager::parse::LR1; +use copager::ir::SExp; +use copager::Processor; + +use example_lang_json::*; + +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +fn main() -> anyhow::Result<()> { + println!("Example "); + print!("Input: "); + stdout().flush()?; + + let mut input = String::new(); + stdin().read_line(&mut input)?; + + let sexp = MyProcessor::new() + .build_lexer()? + .build_parser()? + .process::>(&input)?; + println!("Success: {}", sexp); + + Ok(()) +} diff --git a/examples/oneshot/Cargo.toml b/examples/lang_pl0/Cargo.toml similarity index 88% rename from examples/oneshot/Cargo.toml rename to examples/lang_pl0/Cargo.toml index b38a4b8..fec3ee4 100644 --- a/examples/oneshot/Cargo.toml +++ b/examples/lang_pl0/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "example_oneshot" +name = "example_lang_pl0" version = "0.1.0" edition = "2021" diff --git a/examples/lang_pl0/src/lib.rs b/examples/lang_pl0/src/lib.rs new file mode 100644 index 0000000..4399ede --- /dev/null +++ b/examples/lang_pl0/src/lib.rs @@ -0,0 +1,9 @@ +mod token; +mod rule; + +use copager::Language; + +pub use token::Pl0Token; +pub use rule::Pl0Rule; + +pub type Pl0 = Language; diff --git a/examples/lang_pl0/src/main.rs b/examples/lang_pl0/src/main.rs new file mode 100644 index 0000000..7eb376f --- /dev/null +++ b/examples/lang_pl0/src/main.rs @@ -0,0 +1,29 @@ +use std::io::{stdin, stdout, Write}; + +use copager::lex::RegexLexer; +use copager::parse::LR1; +use copager::ir::SExp; +use copager::Processor; + +use example_lang_pl0::*; + +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +fn main() -> anyhow::Result<()> { + println!("Example "); + print!("Input: "); + stdout().flush()?; + + let mut input = String::new(); + stdin().read_line(&mut input)?; + + let sexp = MyProcessor::new() + .build_lexer()? + .build_parser()? + .process::>(&input)?; + println!("Success: {}", sexp); + + Ok(()) +} diff --git a/examples/lang_pl0/src/rule.rs b/examples/lang_pl0/src/rule.rs new file mode 100644 index 0000000..b8cae11 --- /dev/null +++ b/examples/lang_pl0/src/rule.rs @@ -0,0 +1,110 @@ +use copager::parse::ParseSource; +use copager::prelude::*; + +use crate::token::Pl0Token; + +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, ParseSource)] +pub enum Pl0Rule { + // プログラム本体 + #[default] + #[rule(" ::= Period")] + Program, + + // ブロック + #[rule(" ::= ")] + Block, + + // 定数宣言 + #[rule(" ::= Const Semicolon")] + #[rule(" ::= ")] + ConstDecl, + + #[rule(" ::= Comma ")] + #[rule(" ::= ")] + ConstDefList, + + #[rule(" ::= Ident Eql Number")] + ConstDef, + + // 変数宣言 + #[rule(" ::= Var Semicolon")] + #[rule(" ::= ")] + VarDecl, + + #[rule(" ::= Comma Ident")] + #[rule(" ::= Ident")] + IdentList, + + // 手続き宣言 + #[rule(" ::= ")] + #[rule(" ::= ")] + #[rule(" ::= ")] + ProcDeclList, + + #[rule(" ::= Procedure Ident Semicolon Semicolon")] + // #[rule(" ::= Procedure Ident Semicolon Semicolon")] + ProcDecl, + + // 文 + #[rule(" ::= ")] + #[rule(" ::= ")] + #[rule(" ::= ")] + #[rule(" ::= ")] + #[rule(" ::= ")] + #[rule(" ::= ")] + #[rule(" ::= ")] + Stmt, + + #[rule(" ::= Ident Becomes ")] + AssignStmt, + + #[rule(" ::= Call Ident")] + CallStmt, + + #[rule(" ::= Begin Semicolon End")] + BeginStmt, + + #[rule(" ::= Semicolon ")] + #[rule(" ::= ")] + StmtList, + + #[rule(" ::= If Then ")] + IfStmt, + + #[rule(" ::= While Do ")] + WhileStmt, + + #[rule(" ::= Read ParenL Ident ParenR")] + ReadStmt, + + #[rule(" ::= Write ParenL ParenR")] + WriteStmt, + + // 式 + #[rule(" ::= Odd ")] + #[rule(" ::= ")] + Condition, + + #[rule(" ::= Eql")] + #[rule(" ::= Neq")] + #[rule(" ::= Lss")] + #[rule(" ::= Leq")] + #[rule(" ::= Gtr")] + #[rule(" ::= Geq")] + RelOp, + + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + + #[rule(" ::= Times ")] + #[rule(" ::= Slash ")] + #[rule(" ::= ")] + Term, + + #[rule(" ::= Ident")] + #[rule(" ::= Number")] + #[rule(" ::= ParenL ParenR")] + Factor, +} diff --git a/examples/lang_pl0/src/token.rs b/examples/lang_pl0/src/token.rs new file mode 100644 index 0000000..6b4a156 --- /dev/null +++ b/examples/lang_pl0/src/token.rs @@ -0,0 +1,78 @@ +use copager::lex::LexSource; +use copager::prelude::*; + +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, LexSource)] +pub enum Pl0Token { + // キーワード + #[default] + #[token(text = r"const")] + Const, + #[token(text = r"var")] + Var, + #[token(text = r"procedure")] + Procedure, + #[token(text = r"call")] + Call, + #[token(text = r"begin")] + Begin, + #[token(text = r"end")] + End, + #[token(text = r"if")] + If, + #[token(text = r"then")] + Then, + #[token(text = r"while")] + While, + #[token(text = r"do")] + Do, + #[token(text = r"odd")] + Odd, + #[token(text = r"write")] + Write, + #[token(text = r"read")] + Read, + + // 識別子と数値 + #[token(text = r"[a-zA-Z_][a-zA-Z0-9_]*")] + Ident, + #[token(text = r"\d+")] + Number, + + // 演算子と記号 + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Times, + #[token(text = r"/")] + Slash, + #[token(text = r"=")] + Eql, + #[token(text = r"#")] + Neq, + #[token(text = r"<=")] + Leq, + #[token(text = r"<")] + Lss, + #[token(text = r">=")] + Geq, + #[token(text = r">")] + Gtr, + #[token(text = r"\(")] + ParenL, + #[token(text = r"\)")] + ParenR, + #[token(text = r",")] + Comma, + #[token(text = r"\.")] + Period, + #[token(text = r";")] + Semicolon, + #[token(text = r":=")] + Becomes, + + // 空白 + #[token(text = r"[ \t\n\r]+", ignored)] + _Whitespace, +} diff --git a/examples/lang_xml/Cargo.toml b/examples/lang_xml/Cargo.toml new file mode 100644 index 0000000..132787a --- /dev/null +++ b/examples/lang_xml/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "example_lang_xml" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager = { path = "../..", features = ["derive", "regexlex", "lr1", "sexp"] } diff --git a/examples/lang_xml/src/lib.rs b/examples/lang_xml/src/lib.rs new file mode 100644 index 0000000..871b081 --- /dev/null +++ b/examples/lang_xml/src/lib.rs @@ -0,0 +1,70 @@ +use copager::lex::LexSource; +use copager::parse::ParseSource; +use copager::prelude::*; +use copager::Language; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +pub enum XmlToken { + // 記号 + #[default] + #[token(text = r"<")] + TagL, + #[token(text = r">")] + TagR, + #[token(text = r"/")] + Slash, + #[token(text = r"=")] + Equal, + + // 文字列 & 識別子 + #[token(text = r"[a-zA-Z_][a-zA-Z0-9_]*")] + String, + #[token(text = r"'[a-zA-Z_][a-zA-Z0-9_]*'")] + QuotedString, + #[token(text = r#""[a-zA-Z_][a-zA-Z0-9_]*""#)] + WQuotedString, + + // 空白文字 + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +pub enum XmlRule { + // XML本体 + #[default] + #[rule(" ::= ")] + #[rule(" ::= ")] + Xml, + + // タグ + #[rule(" ::= ")] + #[rule(" ::= ")] + Tag, + + #[rule(" ::= TagL String Slash TagR")] + Single, + + #[rule(" ::= TagL String TagR")] + Begin, + + #[rule(" ::= TagL Slash String TagR")] + End, + + // 属性 + #[rule(" ::= ")] + #[rule(" ::= ")] + #[rule(" ::= ")] + AttrList, + + #[rule(" ::= String Equal QuotedString")] + #[rule(" ::= String Equal WQuotedString")] + Attr, + + // 値 + #[rule(" ::= ")] + #[rule(" ::= String")] + Value, +} + +pub type Xml = Language; diff --git a/examples/lang_xml/src/main.rs b/examples/lang_xml/src/main.rs new file mode 100644 index 0000000..b71b977 --- /dev/null +++ b/examples/lang_xml/src/main.rs @@ -0,0 +1,29 @@ +use std::io::{stdin, stdout, Write}; + +use copager::lex::RegexLexer; +use copager::parse::LR1; +use copager::ir::SExp; +use copager::Processor; + +use example_lang_xml::*; + +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +fn main() -> anyhow::Result<()> { + println!("Example "); + print!("Input: "); + stdout().flush()?; + + let mut input = String::new(); + stdin().read_line(&mut input)?; + + let sexp = MyProcessor::new() + .build_lexer()? + .build_parser()? + .process::>(&input)?; + println!("Success: {}", sexp); + + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index 5b802dc..0fe6f2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,8 +18,14 @@ pub mod lex { pub mod parse { pub use copager_parse::*; + #[cfg(feature = "lr0")] + pub use copager_parse_lr_lr0::*; #[cfg(feature = "lr1")] - pub use copager_parse_lr1::*; + pub use copager_parse_lr_lr1::*; + #[cfg(feature = "slr1")] + pub use copager_parse_lr_slr1::*; + #[cfg(feature = "lalr1")] + pub use copager_parse_lr_lalr1::*; } pub mod ir { @@ -31,6 +37,12 @@ pub mod ir { } pub mod prelude { - pub use copager_cfg::rule::{RuleTag, Rule, RuleElem}; + pub use copager_cfg::rule::{Rule, RuleElem, RuleTag}; pub use copager_cfg::token::TokenTag; } + +#[cfg(feature = "dev")] +pub mod dev { + pub use copager_parse_common::*; + pub use copager_parse_lr_common as lr; +} diff --git a/tests/arithmetic/fail/testcase_1.txt b/tests/arithmetic/fail/testcase_1.txt new file mode 100644 index 0000000..6a452c1 --- /dev/null +++ b/tests/arithmetic/fail/testcase_1.txt @@ -0,0 +1 @@ +() diff --git a/tests/arithmetic/fail/testcase_2.txt b/tests/arithmetic/fail/testcase_2.txt new file mode 100644 index 0000000..bf8d49e --- /dev/null +++ b/tests/arithmetic/fail/testcase_2.txt @@ -0,0 +1 @@ +(10 - diff --git a/tests/arithmetic/fail/testcase_3.txt b/tests/arithmetic/fail/testcase_3.txt new file mode 100644 index 0000000..5555c87 --- /dev/null +++ b/tests/arithmetic/fail/testcase_3.txt @@ -0,0 +1 @@ +10 + diff --git a/tests/arithmetic/fail/testcase_4.txt b/tests/arithmetic/fail/testcase_4.txt new file mode 100644 index 0000000..72e8ffc --- /dev/null +++ b/tests/arithmetic/fail/testcase_4.txt @@ -0,0 +1 @@ +* diff --git a/tests/arithmetic/fail/testcase_5.txt b/tests/arithmetic/fail/testcase_5.txt new file mode 100644 index 0000000..76badf8 --- /dev/null +++ b/tests/arithmetic/fail/testcase_5.txt @@ -0,0 +1 @@ +10 20 + 30 diff --git a/tests/arithmetic/fail/testcase_6.txt b/tests/arithmetic/fail/testcase_6.txt new file mode 100644 index 0000000..167c36b --- /dev/null +++ b/tests/arithmetic/fail/testcase_6.txt @@ -0,0 +1 @@ +10 + 20 * 30 / 40 ( diff --git a/tests/arithmetic/fail/testcase_7.txt b/tests/arithmetic/fail/testcase_7.txt new file mode 100644 index 0000000..162acdf --- /dev/null +++ b/tests/arithmetic/fail/testcase_7.txt @@ -0,0 +1 @@ +(((10)) diff --git a/tests/arithmetic/success/testcase_1.txt b/tests/arithmetic/success/testcase_1.txt new file mode 100644 index 0000000..f599e28 --- /dev/null +++ b/tests/arithmetic/success/testcase_1.txt @@ -0,0 +1 @@ +10 diff --git a/tests/arithmetic/success/testcase_10.txt b/tests/arithmetic/success/testcase_10.txt new file mode 100644 index 0000000..4e08e69 --- /dev/null +++ b/tests/arithmetic/success/testcase_10.txt @@ -0,0 +1 @@ +((10 + 20) * (30 / 40)) - 50 diff --git a/tests/arithmetic/success/testcase_2.txt b/tests/arithmetic/success/testcase_2.txt new file mode 100644 index 0000000..f0da93f --- /dev/null +++ b/tests/arithmetic/success/testcase_2.txt @@ -0,0 +1 @@ +10 + 20 diff --git a/tests/arithmetic/success/testcase_3.txt b/tests/arithmetic/success/testcase_3.txt new file mode 100644 index 0000000..fe24c3e --- /dev/null +++ b/tests/arithmetic/success/testcase_3.txt @@ -0,0 +1 @@ +10 - 20 diff --git a/tests/arithmetic/success/testcase_4.txt b/tests/arithmetic/success/testcase_4.txt new file mode 100644 index 0000000..2f5116a --- /dev/null +++ b/tests/arithmetic/success/testcase_4.txt @@ -0,0 +1 @@ +10 * 20 diff --git a/tests/arithmetic/success/testcase_5.txt b/tests/arithmetic/success/testcase_5.txt new file mode 100644 index 0000000..88d40be --- /dev/null +++ b/tests/arithmetic/success/testcase_5.txt @@ -0,0 +1 @@ +10 / 20 diff --git a/tests/arithmetic/success/testcase_6.txt b/tests/arithmetic/success/testcase_6.txt new file mode 100644 index 0000000..3e9d1a6 --- /dev/null +++ b/tests/arithmetic/success/testcase_6.txt @@ -0,0 +1 @@ +10 + 20 * 30 - 40 diff --git a/tests/arithmetic/success/testcase_7.txt b/tests/arithmetic/success/testcase_7.txt new file mode 100644 index 0000000..44e7963 --- /dev/null +++ b/tests/arithmetic/success/testcase_7.txt @@ -0,0 +1 @@ +(10) diff --git a/tests/arithmetic/success/testcase_8.txt b/tests/arithmetic/success/testcase_8.txt new file mode 100644 index 0000000..7b5a24a --- /dev/null +++ b/tests/arithmetic/success/testcase_8.txt @@ -0,0 +1 @@ +((((10)))) diff --git a/tests/arithmetic/success/testcase_9.txt b/tests/arithmetic/success/testcase_9.txt new file mode 100644 index 0000000..877fba7 --- /dev/null +++ b/tests/arithmetic/success/testcase_9.txt @@ -0,0 +1 @@ +10 * (20 - 30) diff --git a/tests/arithmetic/test.rs b/tests/arithmetic/test.rs new file mode 100644 index 0000000..24d146b --- /dev/null +++ b/tests/arithmetic/test.rs @@ -0,0 +1,33 @@ +mod utils; + +use copager::lex::RegexLexer; +use copager::parse::LR1; +use copager::ir::Void; +use copager::Processor; + +use utils::{Expect, test_dir}; + +use example_lang_arithmetic::*; + +#[test] +fn success() { + test_dir("tests/arithmetic/success", Expect::Ok, &parse); +} + +#[test] +fn fail() { + test_dir("tests/arithmetic/fail", Expect::Err, &parse); +} + +fn parse(input: &str) -> anyhow::Result<()> { + type TestLexer = RegexLexer; + type TestParser = LR1; + type TestProcessor = Processor; + + TestProcessor::new() + .build_lexer()? + .build_parser()? + .process::(input)?; + + Ok(()) +} diff --git a/tests/arithmetic/utils.rs b/tests/arithmetic/utils.rs new file mode 100644 index 0000000..ac683ff --- /dev/null +++ b/tests/arithmetic/utils.rs @@ -0,0 +1,41 @@ +use std::fs; +use std::panic; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Expect { + Ok, + Err, +} + +pub fn test_dir(dir: &str, expect: Expect, test_fn: &T) +where + T: Fn(&str) -> anyhow::Result<()> + panic::RefUnwindSafe, +{ + let mut entries = fs::read_dir(dir) + .unwrap() + .map(|entry| entry.unwrap().path()) + .filter(|path| { path.is_file() }) + .map(|path| { + let body = fs::read_to_string(&path).unwrap(); + (path, body) + }) + .collect::>(); + entries.sort(); + + for (path, body) in entries { + print!("Testing {:?} ... ", path); + let result = panic::catch_unwind(|| test_fn(&body).unwrap()); + match result { + Ok(_) if expect == Expect::Err => { + println!("Failed (expected Error, but got Ok)"); + panic!(""); + + } + Err(e) if expect == Expect::Ok => { + println!("expected Ok, but got Error."); + panic!("{}", e.downcast_ref::().unwrap()); + } + _ => println!("Ok"), + } + } +} diff --git a/tests/json/fail/testcase_01.txt b/tests/json/fail/testcase_01.txt new file mode 100644 index 0000000..98232c6 --- /dev/null +++ b/tests/json/fail/testcase_01.txt @@ -0,0 +1 @@ +{ diff --git a/tests/json/fail/testcase_02.txt b/tests/json/fail/testcase_02.txt new file mode 100644 index 0000000..e072a79 --- /dev/null +++ b/tests/json/fail/testcase_02.txt @@ -0,0 +1 @@ +{"key: "value"} diff --git a/tests/json/fail/testcase_03.txt b/tests/json/fail/testcase_03.txt new file mode 100644 index 0000000..12c5185 --- /dev/null +++ b/tests/json/fail/testcase_03.txt @@ -0,0 +1 @@ +{"key": True} diff --git a/tests/json/fail/testcase_04.txt b/tests/json/fail/testcase_04.txt new file mode 100644 index 0000000..1424259 --- /dev/null +++ b/tests/json/fail/testcase_04.txt @@ -0,0 +1,4 @@ +{ + "key1": "value1", + "key2": "value2", +} diff --git a/tests/json/success/testcase_01.txt b/tests/json/success/testcase_01.txt new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/tests/json/success/testcase_01.txt @@ -0,0 +1 @@ +{} diff --git a/tests/json/success/testcase_02.txt b/tests/json/success/testcase_02.txt new file mode 100644 index 0000000..76519fa --- /dev/null +++ b/tests/json/success/testcase_02.txt @@ -0,0 +1 @@ +{"key": "value"} diff --git a/tests/json/success/testcase_03.txt b/tests/json/success/testcase_03.txt new file mode 100644 index 0000000..e78fb99 --- /dev/null +++ b/tests/json/success/testcase_03.txt @@ -0,0 +1 @@ +{"key1": "value1", "key2": "value2"} diff --git a/tests/json/success/testcase_04.txt b/tests/json/success/testcase_04.txt new file mode 100644 index 0000000..2c3dcde --- /dev/null +++ b/tests/json/success/testcase_04.txt @@ -0,0 +1 @@ +{"key": 10} diff --git a/tests/json/success/testcase_05.txt b/tests/json/success/testcase_05.txt new file mode 100644 index 0000000..95ff2f8 --- /dev/null +++ b/tests/json/success/testcase_05.txt @@ -0,0 +1 @@ +{"key": true} diff --git a/tests/json/success/testcase_06.txt b/tests/json/success/testcase_06.txt new file mode 100644 index 0000000..60424ef --- /dev/null +++ b/tests/json/success/testcase_06.txt @@ -0,0 +1 @@ +{"key": []} diff --git a/tests/json/success/testcase_07.txt b/tests/json/success/testcase_07.txt new file mode 100644 index 0000000..ae4e490 --- /dev/null +++ b/tests/json/success/testcase_07.txt @@ -0,0 +1 @@ +{"key": [10, 20]} diff --git a/tests/json/success/testcase_08.txt b/tests/json/success/testcase_08.txt new file mode 100644 index 0000000..fc52e6d --- /dev/null +++ b/tests/json/success/testcase_08.txt @@ -0,0 +1,6 @@ +{ + "key1": { + "key2": 10, + "key3": 20 + } +} diff --git a/tests/json/success/testcase_09.txt b/tests/json/success/testcase_09.txt new file mode 100644 index 0000000..28d3056 --- /dev/null +++ b/tests/json/success/testcase_09.txt @@ -0,0 +1,10 @@ +{ + "key1": { + "key2": 10, + "key3": 20 + }, + "key4": "value", + "key5": { + "key6": [10, 20, 30] + } +} diff --git a/tests/json/success/testcase_10.txt b/tests/json/success/testcase_10.txt new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/tests/json/success/testcase_10.txt @@ -0,0 +1 @@ +[] diff --git a/tests/json/success/testcase_11.txt b/tests/json/success/testcase_11.txt new file mode 100644 index 0000000..2a421cc --- /dev/null +++ b/tests/json/success/testcase_11.txt @@ -0,0 +1 @@ +[10] diff --git a/tests/json/success/testcase_12.txt b/tests/json/success/testcase_12.txt new file mode 100644 index 0000000..b6ad0ff --- /dev/null +++ b/tests/json/success/testcase_12.txt @@ -0,0 +1 @@ +[10, 20, 30] diff --git a/tests/json/success/testcase_13.txt b/tests/json/success/testcase_13.txt new file mode 100644 index 0000000..c762def --- /dev/null +++ b/tests/json/success/testcase_13.txt @@ -0,0 +1,8 @@ +[ + 10, + "value", + true, + false, + null, + {"key": "value"} +] diff --git a/tests/json/test.rs b/tests/json/test.rs new file mode 100644 index 0000000..77ee1bd --- /dev/null +++ b/tests/json/test.rs @@ -0,0 +1,33 @@ +mod utils; + +use copager::lex::RegexLexer; +use copager::parse::LR1; +use copager::ir::Void; +use copager::Processor; + +use utils::{Expect, test_dir}; + +use example_lang_json::*; + +#[test] +fn success() { + test_dir("tests/json/success", Expect::Ok, &parse); +} + +#[test] +fn fail() { + test_dir("tests/json/fail", Expect::Err, &parse); +} + +fn parse(input: &str) -> anyhow::Result<()> { + type TestLexer = RegexLexer; + type TestParser = LR1; + type TestProcessor = Processor; + + TestProcessor::new() + .build_lexer()? + .build_parser()? + .process::(input)?; + + Ok(()) +} diff --git a/tests/json/utils.rs b/tests/json/utils.rs new file mode 100644 index 0000000..ac683ff --- /dev/null +++ b/tests/json/utils.rs @@ -0,0 +1,41 @@ +use std::fs; +use std::panic; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Expect { + Ok, + Err, +} + +pub fn test_dir(dir: &str, expect: Expect, test_fn: &T) +where + T: Fn(&str) -> anyhow::Result<()> + panic::RefUnwindSafe, +{ + let mut entries = fs::read_dir(dir) + .unwrap() + .map(|entry| entry.unwrap().path()) + .filter(|path| { path.is_file() }) + .map(|path| { + let body = fs::read_to_string(&path).unwrap(); + (path, body) + }) + .collect::>(); + entries.sort(); + + for (path, body) in entries { + print!("Testing {:?} ... ", path); + let result = panic::catch_unwind(|| test_fn(&body).unwrap()); + match result { + Ok(_) if expect == Expect::Err => { + println!("Failed (expected Error, but got Ok)"); + panic!(""); + + } + Err(e) if expect == Expect::Ok => { + println!("expected Ok, but got Error."); + panic!("{}", e.downcast_ref::().unwrap()); + } + _ => println!("Ok"), + } + } +} diff --git a/tests/pl0/fail/testcase_1.txt b/tests/pl0/fail/testcase_1.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/pl0/success/testcase_1.txt b/tests/pl0/success/testcase_1.txt new file mode 100644 index 0000000..4ee4453 --- /dev/null +++ b/tests/pl0/success/testcase_1.txt @@ -0,0 +1,17 @@ +const max = 10; + +var x, y; + +procedure find_max; +begin + if x > y then + write(x); + if x <= y then + write(y); +end; + +begin + read(x); + read(y); + call find_max; +end. diff --git a/tests/pl0/test.rs b/tests/pl0/test.rs new file mode 100644 index 0000000..b5a1daa --- /dev/null +++ b/tests/pl0/test.rs @@ -0,0 +1,33 @@ +mod utils; + +use copager::lex::RegexLexer; +use copager::parse::LR1; +use copager::ir::Void; +use copager::Processor; + +use utils::{Expect, test_dir}; + +use example_lang_pl0::*; + +#[test] +fn success() { + test_dir("tests/pl0/success", Expect::Ok, &parse); +} + +#[test] +fn fail() { + test_dir("tests/pl0/fail", Expect::Err, &parse); +} + +fn parse(input: &str) -> anyhow::Result<()> { + type TestLexer = RegexLexer; + type TestParser = LR1; + type TestProcessor = Processor; + + TestProcessor::new() + .build_lexer()? + .build_parser()? + .process::(input)?; + + Ok(()) +} diff --git a/tests/pl0/utils.rs b/tests/pl0/utils.rs new file mode 100644 index 0000000..ac683ff --- /dev/null +++ b/tests/pl0/utils.rs @@ -0,0 +1,41 @@ +use std::fs; +use std::panic; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Expect { + Ok, + Err, +} + +pub fn test_dir(dir: &str, expect: Expect, test_fn: &T) +where + T: Fn(&str) -> anyhow::Result<()> + panic::RefUnwindSafe, +{ + let mut entries = fs::read_dir(dir) + .unwrap() + .map(|entry| entry.unwrap().path()) + .filter(|path| { path.is_file() }) + .map(|path| { + let body = fs::read_to_string(&path).unwrap(); + (path, body) + }) + .collect::>(); + entries.sort(); + + for (path, body) in entries { + print!("Testing {:?} ... ", path); + let result = panic::catch_unwind(|| test_fn(&body).unwrap()); + match result { + Ok(_) if expect == Expect::Err => { + println!("Failed (expected Error, but got Ok)"); + panic!(""); + + } + Err(e) if expect == Expect::Ok => { + println!("expected Ok, but got Error."); + panic!("{}", e.downcast_ref::().unwrap()); + } + _ => println!("Ok"), + } + } +} diff --git a/tests/xml/fail/testcase_01.txt b/tests/xml/fail/testcase_01.txt new file mode 100644 index 0000000..1b232f5 --- /dev/null +++ b/tests/xml/fail/testcase_01.txt @@ -0,0 +1 @@ + diff --git a/tests/xml/fail/testcase_02.txt b/tests/xml/fail/testcase_02.txt new file mode 100644 index 0000000..0142cf3 --- /dev/null +++ b/tests/xml/fail/testcase_02.txt @@ -0,0 +1 @@ +value diff --git a/tests/xml/fail/testcase_03.txt b/tests/xml/fail/testcase_03.txt new file mode 100644 index 0000000..050a905 --- /dev/null +++ b/tests/xml/fail/testcase_03.txt @@ -0,0 +1,2 @@ +value + diff --git a/tests/xml/fail/testcase_04.txt b/tests/xml/fail/testcase_04.txt new file mode 100644 index 0000000..6ab5ce3 --- /dev/null +++ b/tests/xml/fail/testcase_04.txt @@ -0,0 +1,2 @@ +value + diff --git a/tests/xml/fail/testcase_05.txt b/tests/xml/fail/testcase_05.txt new file mode 100644 index 0000000..7b8fd44 --- /dev/null +++ b/tests/xml/fail/testcase_05.txt @@ -0,0 +1 @@ +value diff --git a/tests/xml/success/testcase_01.txt b/tests/xml/success/testcase_01.txt new file mode 100644 index 0000000..6e34491 --- /dev/null +++ b/tests/xml/success/testcase_01.txt @@ -0,0 +1 @@ +value diff --git a/tests/xml/success/testcase_02.txt b/tests/xml/success/testcase_02.txt new file mode 100644 index 0000000..f7ad3e3 --- /dev/null +++ b/tests/xml/success/testcase_02.txt @@ -0,0 +1 @@ +value diff --git a/tests/xml/success/testcase_03.txt b/tests/xml/success/testcase_03.txt new file mode 100644 index 0000000..6e34491 --- /dev/null +++ b/tests/xml/success/testcase_03.txt @@ -0,0 +1 @@ +value diff --git a/tests/xml/success/testcase_04.txt b/tests/xml/success/testcase_04.txt new file mode 100644 index 0000000..81858b6 --- /dev/null +++ b/tests/xml/success/testcase_04.txt @@ -0,0 +1 @@ + diff --git a/tests/xml/success/testcase_05.txt b/tests/xml/success/testcase_05.txt new file mode 100644 index 0000000..8474b01 --- /dev/null +++ b/tests/xml/success/testcase_05.txt @@ -0,0 +1 @@ +value diff --git a/tests/xml/success/testcase_06.txt b/tests/xml/success/testcase_06.txt new file mode 100644 index 0000000..6dea9cd --- /dev/null +++ b/tests/xml/success/testcase_06.txt @@ -0,0 +1 @@ +value diff --git a/tests/xml/success/testcase_07.txt b/tests/xml/success/testcase_07.txt new file mode 100644 index 0000000..a7cc1c9 --- /dev/null +++ b/tests/xml/success/testcase_07.txt @@ -0,0 +1,3 @@ +value +value +value diff --git a/tests/xml/success/testcase_08.txt b/tests/xml/success/testcase_08.txt new file mode 100644 index 0000000..1e74546 --- /dev/null +++ b/tests/xml/success/testcase_08.txt @@ -0,0 +1,3 @@ + + value + diff --git a/tests/xml/success/testcase_09.txt b/tests/xml/success/testcase_09.txt new file mode 100644 index 0000000..71d23fd --- /dev/null +++ b/tests/xml/success/testcase_09.txt @@ -0,0 +1,4 @@ + + value + value + diff --git a/tests/xml/test.rs b/tests/xml/test.rs new file mode 100644 index 0000000..cc9d879 --- /dev/null +++ b/tests/xml/test.rs @@ -0,0 +1,33 @@ +mod utils; + +use copager::lex::RegexLexer; +use copager::parse::LR1; +use copager::ir::Void; +use copager::Processor; + +use utils::{Expect, test_dir}; + +use example_lang_xml::*; + +#[test] +fn success() { + test_dir("tests/xml/success", Expect::Ok, &parse); +} + +#[test] +fn fail() { + test_dir("tests/xml/fail", Expect::Err, &parse); +} + +fn parse(input: &str) -> anyhow::Result<()> { + type TestLexer = RegexLexer; + type TestParser = LR1; + type TestProcessor = Processor; + + TestProcessor::new() + .build_lexer()? + .build_parser()? + .process::(input)?; + + Ok(()) +} diff --git a/tests/xml/utils.rs b/tests/xml/utils.rs new file mode 100644 index 0000000..ac683ff --- /dev/null +++ b/tests/xml/utils.rs @@ -0,0 +1,41 @@ +use std::fs; +use std::panic; + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Expect { + Ok, + Err, +} + +pub fn test_dir(dir: &str, expect: Expect, test_fn: &T) +where + T: Fn(&str) -> anyhow::Result<()> + panic::RefUnwindSafe, +{ + let mut entries = fs::read_dir(dir) + .unwrap() + .map(|entry| entry.unwrap().path()) + .filter(|path| { path.is_file() }) + .map(|path| { + let body = fs::read_to_string(&path).unwrap(); + (path, body) + }) + .collect::>(); + entries.sort(); + + for (path, body) in entries { + print!("Testing {:?} ... ", path); + let result = panic::catch_unwind(|| test_fn(&body).unwrap()); + match result { + Ok(_) if expect == Expect::Err => { + println!("Failed (expected Error, but got Ok)"); + panic!(""); + + } + Err(e) if expect == Expect::Ok => { + println!("expected Ok, but got Error."); + panic!("{}", e.downcast_ref::().unwrap()); + } + _ => println!("Ok"), + } + } +}