From 5a87442f5ace5cae759b218882cb97e07f960519 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Mon, 26 Aug 2024 17:25:11 +0900 Subject: [PATCH 01/55] [change] parsergen -> copager --- Cargo.lock | 49 ++++++++++++++--------------- Cargo.toml | 15 ++++----- crates/algorithm/Cargo.toml | 4 +-- crates/algorithm_lr1/Cargo.toml | 6 ++-- crates/algorithm_lr1/src/builder.rs | 2 +- crates/algorithm_lr1/src/driver.rs | 6 ++-- crates/algorithm_lr1/src/error.rs | 6 ++-- crates/algorithm_lr1/src/lib.rs | 10 +++--- crates/core/Cargo.toml | 6 ++-- crates/core/src/cfg.rs | 2 +- crates/core_derive/Cargo.toml | 2 +- examples/expr.rs | 8 ++--- src/lib.rs | 4 +-- tests/derive.rs | 2 +- tests/serde.rs | 16 +++++----- 15 files changed, 68 insertions(+), 70 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c5d23a..8ca936d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12,42 +12,53 @@ dependencies = [ ] [[package]] -name = "algorithm" +name = "anyhow" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" + +[[package]] +name = "copager" version = "0.1.1" dependencies = [ - "algorithm_lr1", + "anyhow", + "copager_algorithm", + "copager_core", + "serde", + "serde_json", ] [[package]] -name = "algorithm_lr1" +name = "copager_algorithm" +version = "0.1.1" +dependencies = [ + "copager_algorithm_lr1", +] + +[[package]] +name = "copager_algorithm_lr1" version = "0.1.1" dependencies = [ "anyhow", - "core", + "copager_core", "itertools", "serde", "thiserror", ] [[package]] -name = "anyhow" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" - -[[package]] -name = "core" +name = "copager_core" version = "0.1.1" dependencies = [ "anyhow", - "core_derive", + "copager_core_derive", "regex", "serde", "thiserror", ] [[package]] -name = "core_derive" +name = "copager_core_derive" version = "0.1.1" dependencies = [ "anyhow", @@ -84,18 +95,6 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" -[[package]] -name = "parsergen" -version = "0.1.1" -dependencies = [ - "algorithm", - "anyhow", - "core", - "serde", - "serde_json", - "thiserror", -] - [[package]] name = "proc-macro2" version = "1.0.82" diff --git a/Cargo.toml b/Cargo.toml index aac0baa..a103469 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,21 +1,20 @@ [package] -name = "parsergen" +name = "copager" version = "0.1.1" edition = "2021" [dependencies] -anyhow = { workspace = true } -thiserror = { workspace = true } -pgen_core = { workspace = true } -pgen_algorithm = { workspace = true } +copager_core = { path = "./crates/core" } +copager_algorithm = { path = "./crates/algorithm" } [dev-dependencies] +anyhow = { workspace = true } serde = { workspace = true } serde_json = "1.0.117" [features] default = [] -derive = ["pgen_core/derive"] +derive = ["copager_core/derive"] [workspace] resolver = "2" @@ -28,8 +27,6 @@ exclude = [] [workspace.dependencies] anyhow = "1.0.82" thiserror = "1.0.58" -serde = "1.0.197" +serde = { version = "1.0.197", features = ["derive"] } regex = "1.10.4" regex-macro = "0.2.0" -pgen_core = { package = "core", path = "./crates/core" } -pgen_algorithm = { package = "algorithm", path = "./crates/algorithm" } diff --git a/crates/algorithm/Cargo.toml b/crates/algorithm/Cargo.toml index 0db6c8a..c0f01bd 100644 --- a/crates/algorithm/Cargo.toml +++ b/crates/algorithm/Cargo.toml @@ -1,7 +1,7 @@ [package] -name = "algorithm" +name = "copager_algorithm" version = "0.1.1" edition = "2021" [dependencies] -lr1 = { package = "algorithm_lr1", path = "../algorithm_lr1" } +lr1 = { package = "copager_algorithm_lr1", path = "../algorithm_lr1" } diff --git a/crates/algorithm_lr1/Cargo.toml b/crates/algorithm_lr1/Cargo.toml index 0380016..a9a264c 100644 --- a/crates/algorithm_lr1/Cargo.toml +++ b/crates/algorithm_lr1/Cargo.toml @@ -1,11 +1,11 @@ [package] -name = "algorithm_lr1" +name = "copager_algorithm_lr1" version = "0.1.1" edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde = { workspace = true, features = ["derive"] } +serde = { workspace = true } itertools = "0.12.1" -pgen_core = { package = "core", path = "../core", features = ["derive"] } +copager_core = { path = "../core", features = ["derive"] } diff --git a/crates/algorithm_lr1/src/builder.rs b/crates/algorithm_lr1/src/builder.rs index 488b96f..a790b72 100644 --- a/crates/algorithm_lr1/src/builder.rs +++ b/crates/algorithm_lr1/src/builder.rs @@ -5,7 +5,7 @@ use std::marker::PhantomData; use serde::{Serialize, Deserialize}; use itertools::Itertools; -use pgen_core::cfg::{TokenSet, Syntax, Rule, RuleElem, RuleSet}; +use copager_core::cfg::{TokenSet, Syntax, Rule, RuleElem, RuleSet}; #[derive(Debug, Serialize, Deserialize)] pub(super) enum LRAction { diff --git a/crates/algorithm_lr1/src/driver.rs b/crates/algorithm_lr1/src/driver.rs index c484233..9c7049a 100644 --- a/crates/algorithm_lr1/src/driver.rs +++ b/crates/algorithm_lr1/src/driver.rs @@ -1,6 +1,6 @@ -use pgen_core::cfg::{TokenSet, Syntax}; -use pgen_core::lex::Token; -use pgen_core::parse::{SExp, SExpBuilder}; +use copager_core::cfg::{TokenSet, Syntax}; +use copager_core::lex::Token; +use copager_core::parse::{SExp, SExpBuilder}; use crate::error::ParseError; use crate::builder::{LRAction, LR1Configure}; diff --git a/crates/algorithm_lr1/src/error.rs b/crates/algorithm_lr1/src/error.rs index 025eb7e..f0faa25 100644 --- a/crates/algorithm_lr1/src/error.rs +++ b/crates/algorithm_lr1/src/error.rs @@ -1,8 +1,8 @@ use thiserror::Error; -use pgen_core::error::ParseError as SuperParseError; -use pgen_core::cfg::TokenSet; -use pgen_core::lex::Token; +use copager_core::error::ParseError as SuperParseError; +use copager_core::cfg::TokenSet; +use copager_core::lex::Token; #[derive(Debug, Error)] pub enum ParseError { diff --git a/crates/algorithm_lr1/src/lib.rs b/crates/algorithm_lr1/src/lib.rs index d8ff6c1..ea43c1b 100644 --- a/crates/algorithm_lr1/src/lib.rs +++ b/crates/algorithm_lr1/src/lib.rs @@ -4,9 +4,9 @@ mod driver; use serde::{Serialize, Deserialize}; -use pgen_core::cfg::{TokenSet, Syntax}; -use pgen_core::lex::Token; -use pgen_core::parse::{ParserImpl, SExp}; +use copager_core::cfg::{TokenSet, Syntax}; +use copager_core::lex::Token; +use copager_core::parse::{ParserImpl, SExp}; use builder::LR1Configure; use driver::LR1Driver; @@ -39,8 +39,8 @@ where #[cfg(test)] mod test { - use pgen_core::cfg::{TokenSet, Syntax, Rule, RuleElem}; - use pgen_core::Parser; + use copager_core::cfg::{TokenSet, Syntax, Rule, RuleElem}; + use copager_core::Parser; use super::LR1; diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index ce5d477..6e3804a 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "core" +name = "copager_core" version = "0.1.1" edition = "2021" @@ -8,8 +8,8 @@ anyhow = { workspace = true } thiserror = { workspace = true } serde = { workspace = true, features = ["derive"]} regex = { workspace = true } -pgen_core_derive = { package = "core_derive", path = "../core_derive", optional = true } +copager_core_derive = { path = "../core_derive", optional = true } [features] default = [] -derive = ["dep:pgen_core_derive"] +derive = ["dep:copager_core_derive"] diff --git a/crates/core/src/cfg.rs b/crates/core/src/cfg.rs index c967c91..e3b7f8d 100644 --- a/crates/core/src/cfg.rs +++ b/crates/core/src/cfg.rs @@ -2,7 +2,7 @@ mod token; mod syntax; #[cfg(feature = "derive")] -pub use pgen_core_derive::{TokenSet, Syntax}; +pub use copager_core_derive::{TokenSet, Syntax}; pub use token::TokenSet; pub use syntax::{Syntax, Rule, RuleElem, RuleSet}; diff --git a/crates/core_derive/Cargo.toml b/crates/core_derive/Cargo.toml index 8516b8c..8e362b2 100644 --- a/crates/core_derive/Cargo.toml +++ b/crates/core_derive/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "core_derive" +name = "copager_core_derive" version = "0.1.1" edition = "2021" diff --git a/examples/expr.rs b/examples/expr.rs index 6f7b7a7..a00812e 100644 --- a/examples/expr.rs +++ b/examples/expr.rs @@ -1,9 +1,9 @@ use std::io::stdin; -use parsergen::algorithm::LR1; -use parsergen::cfg::*; -use parsergen::error::ParseError; -use parsergen::Parser; +use copager::algorithm::LR1; +use copager::cfg::*; +use copager::error::ParseError; +use copager::Parser; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] enum ExprTokenSet { diff --git a/src/lib.rs b/src/lib.rs index f7f0c81..ca8197e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,2 @@ -pub use pgen_core::*; -pub use pgen_algorithm as algorithm; +pub use copager_core::*; +pub use copager_algorithm as algorithm; diff --git a/tests/derive.rs b/tests/derive.rs index 87d107f..46dcfab 100644 --- a/tests/derive.rs +++ b/tests/derive.rs @@ -1,4 +1,4 @@ -use parsergen::cfg::*; +use copager::cfg::*; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] enum TestTokenSet { diff --git a/tests/serde.rs b/tests/serde.rs index ce2a274..da7ab5c 100644 --- a/tests/serde.rs +++ b/tests/serde.rs @@ -1,8 +1,8 @@ use serde::{Serialize, Deserialize}; -use parsergen::algorithm::LR1; -use parsergen::cfg::*; -use parsergen::Parser; +use copager::algorithm::LR1; +use copager::cfg::*; +use copager::Parser; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, TokenSet)] enum TestTokenSet { @@ -39,13 +39,15 @@ enum TestSyntax { Num, } -#[test] -fn serde() { - type TestParser<'a> = Parser::<'a, LR1<'a, TestTokenSet, TestSyntax>>; +type TestParser<'a> = Parser::<'a, LR1<'a, TestTokenSet, TestSyntax>>; +#[test] +fn check_serde() { + // build.rs let parser = TestParser::new().unwrap(); let serialized = serde_json::to_string(&parser).unwrap(); - let deserialized: TestParser = serde_json::from_str(&serialized).unwrap(); + // main.rs + let deserialized: TestParser = serde_json::from_str(&serialized).unwrap(); deserialized.parse("10 * (20 - 30)").unwrap(); } From 9f58593356def24d60706dc5c4d165a207fe09a7 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 27 Aug 2024 00:44:06 +0900 Subject: [PATCH 02/55] =?UTF-8?q?[wip]=20=E3=82=AF=E3=83=AC=E3=83=BC?= =?UTF-8?q?=E3=83=88=E6=A7=8B=E9=80=A0=E5=A4=A7=E6=94=B9=E9=80=A0=20?= =?UTF-8?q?=E9=80=94=E4=B8=AD=E3=81=BE=E3=81=A7=20(algorithm*=E4=BB=A5?= =?UTF-8?q?=E5=A4=96)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 85 +++++++- Cargo.toml | 15 +- crates/algorithm_lr1/Cargo.toml | 2 +- crates/cfg/Cargo.toml | 8 + crates/cfg/src/lib.rs | 38 ++++ .../src/cfg/syntax.rs => cfg/src/rule.rs} | 159 ++++++--------- crates/cfg/src/token.rs | 27 +++ crates/{core_derive => cfg_derive}/Cargo.toml | 2 +- crates/cfg_derive/src/impl.rs | 2 + .../syntax.rs => cfg_derive/src/impl/rule.rs} | 22 +-- .../src/impl/token.rs} | 22 +-- crates/{core_derive => cfg_derive}/src/lib.rs | 8 +- crates/core/Cargo.toml | 10 +- crates/core/src/cfg.rs | 8 - crates/core/src/cfg/token.rs | 25 --- crates/core/src/error.rs | 6 +- crates/core/src/lex.rs | 184 ------------------ crates/core/src/lib.rs | 6 +- crates/core_derive/src/impl.rs | 2 - crates/ir/Cargo.toml | 9 + crates/ir/src/lib.rs | 18 ++ crates/ir_sexp/Cargo.toml | 10 + .../{core/src/parse.rs => ir_sexp/src/lib.rs} | 81 ++++---- crates/lex/Cargo.toml | 9 + crates/lex/src/lib.rs | 36 ++++ crates/lex_regex/Cargo.toml | 12 ++ crates/lex_regex/src/lib.rs | 77 ++++++++ src/lib.rs | 2 + 28 files changed, 480 insertions(+), 405 deletions(-) create mode 100644 crates/cfg/Cargo.toml create mode 100644 crates/cfg/src/lib.rs rename crates/{core/src/cfg/syntax.rs => cfg/src/rule.rs} (74%) create mode 100644 crates/cfg/src/token.rs rename crates/{core_derive => cfg_derive}/Cargo.toml (89%) create mode 100644 crates/cfg_derive/src/impl.rs rename crates/{core_derive/src/impl/syntax.rs => cfg_derive/src/impl/rule.rs} (91%) rename crates/{core_derive/src/impl/tokenset.rs => cfg_derive/src/impl/token.rs} (91%) rename crates/{core_derive => cfg_derive}/src/lib.rs (62%) delete mode 100644 crates/core/src/cfg.rs delete mode 100644 crates/core/src/cfg/token.rs delete mode 100644 crates/core/src/lex.rs delete mode 100644 crates/core_derive/src/impl.rs create mode 100644 crates/ir/Cargo.toml create mode 100644 crates/ir/src/lib.rs create mode 100644 crates/ir_sexp/Cargo.toml rename crates/{core/src/parse.rs => ir_sexp/src/lib.rs} (51%) create mode 100644 crates/lex/Cargo.toml create mode 100644 crates/lex/src/lib.rs create mode 100644 crates/lex_regex/Cargo.toml create mode 100644 crates/lex_regex/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 8ca936d..92e1c8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,7 +23,14 @@ version = "0.1.1" dependencies = [ "anyhow", "copager_algorithm", + "copager_algorithm_lr1", + "copager_cfg", + "copager_cfg_derive", "copager_core", + "copager_ir", + "copager_ir_sexp", + "copager_lex", + "copager_lex_regex", "serde", "serde_json", ] @@ -47,18 +54,15 @@ dependencies = [ ] [[package]] -name = "copager_core" +name = "copager_cfg" version = "0.1.1" dependencies = [ "anyhow", - "copager_core_derive", - "regex", - "serde", "thiserror", ] [[package]] -name = "copager_core_derive" +name = "copager_cfg_derive" version = "0.1.1" dependencies = [ "anyhow", @@ -68,11 +72,62 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_core" +version = "0.1.1" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "serde", + "thiserror", +] + +[[package]] +name = "copager_ir" +version = "0.1.1" +dependencies = [ + "anyhow", + "copager_cfg", + "thiserror", +] + +[[package]] +name = "copager_ir_sexp" +version = "0.1.1" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_ir", + "thiserror", +] + +[[package]] +name = "copager_lex" +version = "0.1.1" +dependencies = [ + "anyhow", + "copager_cfg", + "thiserror", +] + +[[package]] +name = "copager_lex_regex" +version = "0.1.1" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "regex", + "regex-macro", + "thiserror", +] + [[package]] name = "either" -version = "1.11.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "itertools" @@ -95,6 +150,12 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + [[package]] name = "proc-macro2" version = "1.0.82" @@ -136,6 +197,16 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-macro" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12fa36e7add16db296640bba993a65dae2a0088a8e5cd5f935c8bfbd3710145b" +dependencies = [ + "once_cell", + "regex", +] + [[package]] name = "regex-syntax" version = "0.8.3" diff --git a/Cargo.toml b/Cargo.toml index a103469..3f721c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,13 @@ edition = "2021" [dependencies] copager_core = { path = "./crates/core" } copager_algorithm = { path = "./crates/algorithm" } +copager_algorithm_lr1 = { path = "./crates/algorithm_lr1" } +copager_cfg = { path = "./crates/cfg" } +copager_cfg_derive = { path = "./crates/cfg_derive" } +copager_lex = { path = "./crates/lex" } +copager_lex_regex = { path = "./crates/lex_regex" } +copager_ir = { path = "./crates/ir" } +copager_ir_sexp = { path = "./crates/ir_sexp" } [dev-dependencies] anyhow = { workspace = true } @@ -14,13 +21,19 @@ serde_json = "1.0.117" [features] default = [] -derive = ["copager_core/derive"] [workspace] resolver = "2" members = [ "./crates/core", "./crates/algorithm", + "./crates/algorithm_lr1", + "./crates/cfg", + "./crates/cfg_derive", + "./crates/lex", + "./crates/lex_regex", + "./crates/ir", + "./crates/ir_sexp", ] exclude = [] diff --git a/crates/algorithm_lr1/Cargo.toml b/crates/algorithm_lr1/Cargo.toml index a9a264c..0859836 100644 --- a/crates/algorithm_lr1/Cargo.toml +++ b/crates/algorithm_lr1/Cargo.toml @@ -8,4 +8,4 @@ anyhow = { workspace = true } thiserror = { workspace = true } serde = { workspace = true } itertools = "0.12.1" -copager_core = { path = "../core", features = ["derive"] } +copager_core = { path = "../core" } diff --git a/crates/cfg/Cargo.toml b/crates/cfg/Cargo.toml new file mode 100644 index 0000000..b710730 --- /dev/null +++ b/crates/cfg/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "copager_cfg" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } diff --git a/crates/cfg/src/lib.rs b/crates/cfg/src/lib.rs new file mode 100644 index 0000000..88dec6b --- /dev/null +++ b/crates/cfg/src/lib.rs @@ -0,0 +1,38 @@ +pub mod rule; +pub mod token; + +use std::hash::Hash; + +use rule::{Rule, RuleSet}; + +pub trait TokenKind<'a> +where + Self: Copy + Clone + Hash + Eq, +{ + fn as_str(&self) -> &'a str; + fn ignore_str() -> &'a str; + fn into_iter() -> impl Iterator; +} + +pub trait RuleKind<'a> +where + Self: Clone + Hash + Eq, +{ + type TokenKind: crate::TokenKind<'a>; + + fn into_rules(&self) -> Vec>; + fn into_iter() -> impl Iterator; + + fn into_ruleset() -> RuleSet<'a, Self::TokenKind> { + Self::into_iter() + .enumerate() + .flat_map(|(idx, elem)| { + let mut rules = Self::into_rules(&elem); + for rule in &mut rules { + rule.id = idx; + } + rules + }) + .collect::>() + } +} diff --git a/crates/core/src/cfg/syntax.rs b/crates/cfg/src/rule.rs similarity index 74% rename from crates/core/src/cfg/syntax.rs rename to crates/cfg/src/rule.rs index a41d085..8724ca7 100644 --- a/crates/core/src/cfg/syntax.rs +++ b/crates/cfg/src/rule.rs @@ -1,55 +1,23 @@ use std::collections::HashMap; -use std::fmt::Debug; use std::hash::Hash; use std::marker::PhantomData; -use super::token::TokenSet; - -pub trait Syntax<'a> -where - Self: Debug + Clone + Copy, -{ - type TokenSet: TokenSet<'a>; - - fn into_iter() -> impl Iterator; - fn into_rules(&self) -> Vec>; - - fn into_ruleset() -> RuleSet<'a, Self::TokenSet> { - let rules = Self::into_iter() - .enumerate() - .flat_map(|(idx, elem)| { - let mut rules = Self::into_rules(&elem); - for rule in &mut rules { - rule.id = idx; - } - rules - }) - .collect::>(); - - RuleSet::from(rules) - } -} +use crate::TokenKind; #[derive(PartialEq, Eq, Hash, Debug)] -pub struct Rule<'a, T: TokenSet<'a>> { +pub struct Rule<'a, T: TokenKind<'a>> { pub id: usize, pub lhs: RuleElem<'a, T>, pub rhs: Vec>, - tokenset: PhantomData<&'a T>, } -impl<'a, T: TokenSet<'a>> From<(RuleElem<'a, T>, Vec>)> for Rule<'a, T> { +impl<'a, T: TokenKind<'a>> From<(RuleElem<'a, T>, Vec>)> for Rule<'a, T> { fn from((lhs, rhs): (RuleElem<'a, T>, Vec>)) -> Self { - Rule { - id: 0, - lhs, - rhs, - tokenset: PhantomData, - } + Rule { id: 0, lhs, rhs } } } -impl<'a, T: TokenSet<'a>> Rule<'a, T> { +impl<'a, T: TokenKind<'a>> Rule<'a, T> { pub fn nonterms<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { let mut l_nonterms = vec![&self.lhs]; let r_nonterms: Vec<&RuleElem> = self @@ -69,14 +37,14 @@ impl<'a, T: TokenSet<'a>> Rule<'a, T> { } } -#[derive(Debug)] -pub enum RuleElem<'a, T: TokenSet<'a>> { +#[derive(Debug, Eq)] +pub enum RuleElem<'a, T: TokenKind<'a>> { NonTerm(String), - Term((T, PhantomData<&'a T>)), + Term((T, PhantomData<&'a ()>)), EOF, } -impl<'a, T: TokenSet<'a>> Hash for RuleElem<'a, T> { +impl<'a, T: TokenKind<'a>> Hash for RuleElem<'a, T> { fn hash(&self, state: &mut H) { match self { RuleElem::NonTerm(s) => s.hash(state), @@ -86,7 +54,7 @@ impl<'a, T: TokenSet<'a>> Hash for RuleElem<'a, T> { } } -impl<'a, T: TokenSet<'a>> PartialEq for RuleElem<'a, T> { +impl<'a, T: TokenKind<'a>> PartialEq for RuleElem<'a, T> { fn eq(&self, other: &Self) -> bool { match (self, other) { (RuleElem::NonTerm(s1), RuleElem::NonTerm(s2)) => s1 == s2, @@ -97,9 +65,7 @@ impl<'a, T: TokenSet<'a>> PartialEq for RuleElem<'a, T> { } } -impl<'a, T: TokenSet<'a>> Eq for RuleElem<'a, T> {} - -impl<'a, T: TokenSet<'a>> RuleElem<'a, T> { +impl<'a, T: TokenKind<'a>> RuleElem<'a, T> { pub fn new_nonterm>(t: U) -> RuleElem<'a, T> { RuleElem::NonTerm(t.into()) } @@ -110,28 +76,26 @@ impl<'a, T: TokenSet<'a>> RuleElem<'a, T> { } #[derive(Debug)] -pub struct RuleSet<'a, T: TokenSet<'a>> { +pub struct RuleSet<'a, T: TokenKind<'a>> { pub top: String, pub rules: Vec>, - tokenset: PhantomData<&'a T>, } -impl<'a, T: TokenSet<'a>> From>> for RuleSet<'a, T> { - fn from(rules: Vec>) -> Self { +impl<'a, T: TokenKind<'a>> FromIterator> for RuleSet<'a, T> { + fn from_iter(rules: I) -> Self + where + I: IntoIterator>, + { + let rules = rules.into_iter().collect::>(); let top = match &rules[0].lhs { RuleElem::NonTerm(s) => s.clone(), _ => unreachable!(), }; - - RuleSet { - top, - rules, - tokenset: PhantomData, - } + RuleSet { top, rules } } } -impl<'a, T: TokenSet<'a>> RuleSet<'a, T> { +impl<'a, T: TokenKind<'a>> RuleSet<'a, T> { pub fn nonterms<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { self.rules.iter().flat_map(|rule| rule.nonterms()).collect() } @@ -233,9 +197,11 @@ impl<'a, T: TokenSet<'a>> RuleSet<'a, T> { mod test { use std::collections::HashMap; - use super::{TokenSet, Syntax, Rule, RuleElem}; + use crate::{TokenKind, RuleKind}; - #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] + use super::{Rule, RuleElem}; + + #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] enum TestToken { Num, Plus, @@ -246,23 +212,8 @@ mod test { BracketB, } - impl TokenSet<'_> for TestToken { - fn into_iter() -> impl Iterator { - Box::new( - vec![ - TestToken::Num, - TestToken::Plus, - TestToken::Minus, - TestToken::Mul, - TestToken::Div, - TestToken::BracketA, - TestToken::BracketB, - ] - .into_iter(), - ) - } - - fn into_regex_str(&self) -> &'static str { + impl TokenKind<'_> for TestToken { + fn as_str(&self) -> &'static str { match self { TestToken::Num => r"^[1-9][0-9]*", TestToken::Plus => r"^\+", @@ -277,10 +228,23 @@ mod test { fn ignore_str() -> &'static str { r"^[ \t\n]+" } + + fn into_iter() -> impl Iterator { + vec![ + TestToken::Num, + TestToken::Plus, + TestToken::Minus, + TestToken::Mul, + TestToken::Div, + TestToken::BracketA, + TestToken::BracketB, + ] + .into_iter() + } } - #[derive(Debug, Clone, Copy)] - enum TestSyntax { + #[derive(Debug, Clone, Hash, PartialEq, Eq)] + enum TestRule { ExprPlus, ExprMinus, Expr2Term, @@ -291,26 +255,26 @@ mod test { Fact2Num, } - impl<'a> Syntax<'a> for TestSyntax { - type TokenSet = TestToken; + impl<'a> RuleKind<'a> for TestRule { + type TokenKind = TestToken; fn into_iter() -> impl Iterator { Box::new( vec![ - TestSyntax::ExprPlus, - TestSyntax::ExprMinus, - TestSyntax::Expr2Term, - TestSyntax::TermMul, - TestSyntax::TermDiv, - TestSyntax::Term2Fact, - TestSyntax::Fact2Expr, - TestSyntax::Fact2Num, + TestRule::ExprPlus, + TestRule::ExprMinus, + TestRule::Expr2Term, + TestRule::TermMul, + TestRule::TermDiv, + TestRule::Term2Fact, + TestRule::Fact2Expr, + TestRule::Fact2Num, ] .into_iter(), ) } - fn into_rules(&self) -> Vec> { + fn into_rules(&self) -> Vec> { let expr_plus = Rule::from(( RuleElem::new_nonterm("expr"), vec![ @@ -369,14 +333,14 @@ mod test { let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![])); match self { - TestSyntax::ExprPlus => vec![expr_plus], - TestSyntax::ExprMinus => vec![expr_minus], - TestSyntax::Expr2Term => vec![expr_2_term], - TestSyntax::TermMul => vec![term_mul], - TestSyntax::TermDiv => vec![term_div], - TestSyntax::Term2Fact => vec![term_2_fact], - TestSyntax::Fact2Expr => vec![fact_2_expr], - TestSyntax::Fact2Num => vec![fact_2_num], + TestRule::ExprPlus => vec![expr_plus], + TestRule::ExprMinus => vec![expr_minus], + TestRule::Expr2Term => vec![expr_2_term], + TestRule::TermMul => vec![term_mul], + TestRule::TermDiv => vec![term_div], + TestRule::Term2Fact => vec![term_2_fact], + TestRule::Fact2Expr => vec![fact_2_expr], + TestRule::Fact2Num => vec![fact_2_num], } } } @@ -404,7 +368,7 @@ mod test { #[test] fn first_set() { - let ruleset = ::into_ruleset(); + let ruleset = ::into_ruleset(); let first_set = ruleset.first_set(); check( @@ -426,3 +390,4 @@ mod test { check(&first_set, "fact", vec![TestToken::BracketA]); } } + diff --git a/crates/cfg/src/token.rs b/crates/cfg/src/token.rs new file mode 100644 index 0000000..8e0b4be --- /dev/null +++ b/crates/cfg/src/token.rs @@ -0,0 +1,27 @@ +use std::marker::PhantomData; + +use crate::TokenKind; + +#[derive(Debug, Copy, Clone)] +pub struct Token<'a, 'b, T: TokenKind<'a>> { + pub kind: T, + pub src: &'b str, + pub range: (usize, usize), + _phantom: PhantomData<&'a ()>, +} + +impl<'a, 'b, T: TokenKind<'a>> Token<'a, 'b, T> { + pub fn new(kind: T, src: &'b str, range: (usize, usize)) -> Self { + Token { + kind, + src, + range, + _phantom: PhantomData, + } + } + + pub fn as_str(&self) -> &'b str { + let (l, r) = self.range; + &self.src[l..r] + } +} diff --git a/crates/core_derive/Cargo.toml b/crates/cfg_derive/Cargo.toml similarity index 89% rename from crates/core_derive/Cargo.toml rename to crates/cfg_derive/Cargo.toml index 8e362b2..810a51d 100644 --- a/crates/core_derive/Cargo.toml +++ b/crates/cfg_derive/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "copager_core_derive" +name = "copager_cfg_derive" version = "0.1.1" edition = "2021" diff --git a/crates/cfg_derive/src/impl.rs b/crates/cfg_derive/src/impl.rs new file mode 100644 index 0000000..f89ee34 --- /dev/null +++ b/crates/cfg_derive/src/impl.rs @@ -0,0 +1,2 @@ +pub mod token; +pub mod rule; diff --git a/crates/core_derive/src/impl/syntax.rs b/crates/cfg_derive/src/impl/rule.rs similarity index 91% rename from crates/core_derive/src/impl/syntax.rs rename to crates/cfg_derive/src/impl/rule.rs index 83324ce..dd0eff9 100644 --- a/crates/core_derive/src/impl/syntax.rs +++ b/crates/cfg_derive/src/impl/rule.rs @@ -2,11 +2,11 @@ use proc_macro2::TokenStream; use quote::quote; use syn::{Data, DeriveInput, Variant, Ident, LitStr}; -pub fn syntax_proc_macro_impl(ast: DeriveInput) -> TokenStream { +pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { let data_enum = if let Data::Enum(data_enum) = ast.data { data_enum } else { - panic!("\"Syntax\" proc-macro is only implemented for enum.") + panic!("\"RuleKind\" proc-macro is only implemented for enum.") }; let parsed_variantes = data_enum @@ -17,7 +17,7 @@ pub fn syntax_proc_macro_impl(ast: DeriveInput) -> TokenStream { let enum_name = &ast.ident; let enum_assoc_type = format!("{}", enum_name) - .replace("Syntax", "TokenSet") + .replace("Rule", "Token") .parse::() .unwrap(); let enum_variants = parsed_variantes @@ -29,20 +29,20 @@ pub fn syntax_proc_macro_impl(ast: DeriveInput) -> TokenStream { quote! { impl<'a> Syntax<'a> for #enum_name { - type TokenSet = #enum_assoc_type; + type TokenKind = #enum_assoc_type; - fn into_iter() -> impl Iterator { - vec![ - #( #enum_variants, )* - ].into_iter() - } - - fn into_rules(&self) -> Vec> { + fn into_rules(&self) -> Vec> { match self { #( #enum_rule_table, )* _ => unimplemented!(), } } + + fn into_iter() -> impl Iterator { + vec![ + #( #enum_variants, )* + ].into_iter() + } } } } diff --git a/crates/core_derive/src/impl/tokenset.rs b/crates/cfg_derive/src/impl/token.rs similarity index 91% rename from crates/core_derive/src/impl/tokenset.rs rename to crates/cfg_derive/src/impl/token.rs index 1cd4df7..f092ab3 100644 --- a/crates/core_derive/src/impl/tokenset.rs +++ b/crates/cfg_derive/src/impl/token.rs @@ -6,7 +6,7 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { let data_enum = if let Data::Enum(data_enum) = ast.data { data_enum } else { - panic!("\"Tokenset\" proc-macro is only implemented for enum.") + panic!("\"TokenKind\" proc-macro is only implemented for enum.") }; let parsed_variantes = data_enum @@ -31,7 +31,14 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { .map(|variant| variant.gen_ident_with_regex()); quote! { - impl TokenSet<'_> for #enum_name { + impl TokenKind<'_> for #enum_name { + fn as_str(&self) -> &'static str { + match self { + #( #enum_regex_table, )* + _ => unimplemented!(), + } + } + fn ignore_str() -> &'static str { #enum_ignored } @@ -41,13 +48,6 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { #( #enum_variants, )* ].into_iter() } - - fn into_regex_str(&self) -> &'static str { - match self { - #( #enum_regex_table, )* - _ => unimplemented!(), - } - } } } } @@ -68,8 +68,8 @@ impl<'a> VariantInfo<'a> { let mut ignored = false; for attr in &variant.attrs { let _ = attr.parse_nested_meta(|meta| { - // #[...(regex = "...")] - if meta.path.is_ident("regex") { + // #[...(text = "...")] + if meta.path.is_ident("text") { let raw_regex = meta.value()?.parse::()?.value(); regex = Some(format!("^{}", raw_regex)); return Ok(()); diff --git a/crates/core_derive/src/lib.rs b/crates/cfg_derive/src/lib.rs similarity index 62% rename from crates/core_derive/src/lib.rs rename to crates/cfg_derive/src/lib.rs index 9bdfd72..a1bb051 100644 --- a/crates/core_derive/src/lib.rs +++ b/crates/cfg_derive/src/lib.rs @@ -2,14 +2,14 @@ mod r#impl; use syn::{parse_macro_input, DeriveInput}; -#[proc_macro_derive(TokenSet, attributes(token))] +#[proc_macro_derive(TokenKind, attributes(token))] pub fn derive_tokenset(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let ast = parse_macro_input!(input as DeriveInput); - r#impl::tokenset::proc_macro_impl(ast).into() + r#impl::token::proc_macro_impl(ast).into() } -#[proc_macro_derive(Syntax, attributes(rule))] +#[proc_macro_derive(RuleKind, attributes(rule))] pub fn derive_syntax(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let ast = parse_macro_input!(input as DeriveInput); - r#impl::syntax::syntax_proc_macro_impl(ast).into() + r#impl::rule::proc_macro_impl(ast).into() } diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 6e3804a..078ce68 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -6,10 +6,6 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde = { workspace = true, features = ["derive"]} -regex = { workspace = true } -copager_core_derive = { path = "../core_derive", optional = true } - -[features] -default = [] -derive = ["dep:copager_core_derive"] +serde = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } diff --git a/crates/core/src/cfg.rs b/crates/core/src/cfg.rs deleted file mode 100644 index e3b7f8d..0000000 --- a/crates/core/src/cfg.rs +++ /dev/null @@ -1,8 +0,0 @@ -mod token; -mod syntax; - -#[cfg(feature = "derive")] -pub use copager_core_derive::{TokenSet, Syntax}; - -pub use token::TokenSet; -pub use syntax::{Syntax, Rule, RuleElem, RuleSet}; diff --git a/crates/core/src/cfg/token.rs b/crates/core/src/cfg/token.rs deleted file mode 100644 index e3a6d3f..0000000 --- a/crates/core/src/cfg/token.rs +++ /dev/null @@ -1,25 +0,0 @@ -use std::fmt::Debug; -use std::hash::Hash; - -use regex::{Regex, RegexSet}; - -pub trait TokenSet<'a> -where - Self: Debug + Copy + Clone + Hash + Eq, -{ - fn ignore_str() -> &'a str; - fn into_iter() -> impl Iterator; - fn into_regex_str(&self) -> &'a str; - - fn into_regex(&self) -> anyhow::Result { - Ok(Regex::new(self.into_regex_str())?) - } - - fn try_into_regexset() -> anyhow::Result { - let regex_set = Self::into_iter() - .map(|token| Self::into_regex_str(&token)) - .collect::>(); - - Ok(RegexSet::new(regex_set)?) - } -} diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index 7f559fc..16a3539 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -4,8 +4,8 @@ use std::fmt::Display; use thiserror::Error; -use crate::cfg::TokenSet; -use crate::lex::Token; +use copager_cfg::token::Token; +use copager_cfg::TokenKind; #[derive(Debug, Error)] pub struct ParseError { @@ -32,7 +32,7 @@ impl ParseError { } } - pub fn with<'a, T: TokenSet<'a>>(self, token: Token<'a, '_, T>) -> ParseError { + pub fn with<'a, T: TokenKind<'a>>(self, token: Token<'a, '_, T>) -> ParseError { let mut sum = 0; let (mut rows, mut cols) = (1, 1); for c in token.src.chars() { diff --git a/crates/core/src/lex.rs b/crates/core/src/lex.rs deleted file mode 100644 index 240da9f..0000000 --- a/crates/core/src/lex.rs +++ /dev/null @@ -1,184 +0,0 @@ -use std::marker::PhantomData; - -use regex::{Regex, RegexSet}; - -use crate::cfg::TokenSet; - -#[derive(Debug, Copy, Clone)] -pub struct Token<'a, 'b, T: TokenSet<'a>> { - pub kind: T, - pub src: &'b str, - pub range: (usize, usize), - tokenset: PhantomData<&'a T>, -} - -impl<'a, 'b, T: TokenSet<'a>> Token<'a, 'b, T> { - pub fn new(kind: T, src: &'b str, range: (usize, usize)) -> Self { - Token { - kind, - src, - range, - tokenset: PhantomData, - } - } - - pub fn as_str(&self) -> &'b str { - let (l, r) = self.range; - &self.src[l..r] - } -} - -pub(crate) struct Lexer; - -impl Lexer { - pub fn new<'a, 'b, T>(input: &'b str) -> anyhow::Result>> - where - T: TokenSet<'a> + 'a, - { - LexDriver::<'a, 'b, T>::try_from(input) - } -} - -struct LexDriver<'a, 'b, T: TokenSet<'a>> { - // Regex - regex_istr: Regex, - regex_set: RegexSet, - regex_map: Vec<(Regex, T)>, - - // State - input: &'b str, - pos: usize, - - // PhantomData - tokenset: PhantomData<&'a T>, -} - -impl<'a, 'b, T: TokenSet<'a>> TryFrom<&'b str> for LexDriver<'a, 'b, T> { - type Error = anyhow::Error; - - fn try_from(input: &'b str) -> anyhow::Result { - let regex_istr = Regex::new(T::ignore_str())?; - let regex_set = T::try_into_regexset()?; - let regex_map = T::into_iter() - .map(|token| Ok((token.into_regex()?, token))) - .collect::>>()?; - - Ok(LexDriver { - regex_istr, - regex_set, - regex_map, - input, - pos: 0, - tokenset: PhantomData, - }) - } -} - -impl<'a, 'b, T: TokenSet<'a> + 'a> Iterator for LexDriver<'a, 'b, T> { - type Item = Token<'a, 'b, T>; - - fn next(&mut self) -> Option { - // Skip Spaces - let remain = match self.regex_istr.find(&self.input[self.pos..]) { - Some(acc_s) => { - self.pos += acc_s.len(); - &self.input[self.pos..] - } - None => &self.input[self.pos..] - }; - - // Find the token - let mut matches = self - .regex_set - .matches(remain) - .into_iter() - .map(|idx| &self.regex_map[idx]) - .map(|(regex, token)| (*token, regex.find(remain).unwrap().as_str())) - .collect::>(); - matches.sort_by(|(_, a), (_, b)| a.len().cmp(&b.len())); - - // Update myself - let (token, acc_s) = matches.first()?; - let range = (self.pos, self.pos + acc_s.len()); - self.pos += acc_s.len(); - - Some(Token::new(*token, &self.input, range)) - } -} - -#[cfg(test)] -mod test { - use serde::{Deserialize, Serialize}; - - use crate::cfg::TokenSet; - use super::Lexer; - - #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)] - enum TestToken { - Num, - Plus, - } - - impl TokenSet<'_> for TestToken { - fn ignore_str() -> &'static str { - r"^[ \t\n]+" - } - - fn into_iter() -> impl Iterator { - vec![TestToken::Num, TestToken::Plus].into_iter() - } - - fn into_regex_str(&self) -> &'static str { - match self { - TestToken::Num => r"^[1-9][0-9]*", - TestToken::Plus => r"^\+", - } - } - } - - fn check<'a, 'b>( - expected: &Vec<(TestToken, &'b str, (usize, usize))>, - input: &'b str, - ) -> bool { - Lexer::new::(input) - .unwrap() - .into_iter() - .zip(expected.iter()) - .all(|(a, b)| { - a.kind == b.0 && a.range == b.2 && a.as_str() == b.1 - }) - } - - #[test] - fn input_ok_1() { - let expected = vec![ - (TestToken::Num, "10", (0, 2)), - (TestToken::Plus, "+", (2, 3)), - (TestToken::Num, "20", (3, 5)), - ]; - let input = "10+20"; - assert!(check(&expected, input)); - } - - #[test] - fn input_ok_2() { - let expected = vec![ - (TestToken::Num, "10", (12, 14)), - (TestToken::Plus, "+", (15, 16)), - (TestToken::Num, "20", (23, 25)), - ]; - let input = " 10 +\n 20 "; - assert!(check(&expected, input)); - } - - #[test] - fn input_ok_3() { - let expected = vec![ - (TestToken::Num, "10", (12, 14)), - (TestToken::Plus, "+", (15, 16)), - (TestToken::Num, "20", (23, 25)), - ]; - let input = " 10 +\n 20ffff30 - 40 * 50"; - assert!(check(&expected, input)); - } -} diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 78bc5b5..0c5e3bf 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -1,14 +1,10 @@ -pub mod cfg; pub mod error; -pub mod parse; -pub mod lex; use std::marker::PhantomData; use serde::{Serialize, Deserialize}; -use lex::Lexer; -use parse::{ParserImpl, SExp}; +use copager_lex::Lexer; #[derive(Debug, Serialize, Deserialize)] pub struct Parser<'a, Algorithm> diff --git a/crates/core_derive/src/impl.rs b/crates/core_derive/src/impl.rs deleted file mode 100644 index dd500e0..0000000 --- a/crates/core_derive/src/impl.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod tokenset; -pub mod syntax; diff --git a/crates/ir/Cargo.toml b/crates/ir/Cargo.toml new file mode 100644 index 0000000..c9af348 --- /dev/null +++ b/crates/ir/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "copager_ir" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs new file mode 100644 index 0000000..7c3b4b4 --- /dev/null +++ b/crates/ir/src/lib.rs @@ -0,0 +1,18 @@ +use copager_cfg::{RuleKind, TokenKind}; + +pub trait IR<'a, T, R> +where + T: TokenKind<'a>, + R: RuleKind<'a, TokenKind = T>, +{ + type Builder: IRBuilder<'a>; +} + +pub trait IRBuilder<'a> { + type TokenKind: TokenKind<'a>; + type RuleKind: RuleKind<'a, TokenKind = Self::TokenKind>; + type Output: IR<'a, Self::TokenKind, Self::RuleKind>; + + fn new() -> Self; + fn build(self) -> anyhow::Result; +} diff --git a/crates/ir_sexp/Cargo.toml b/crates/ir_sexp/Cargo.toml new file mode 100644 index 0000000..de30731 --- /dev/null +++ b/crates/ir_sexp/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "copager_ir_sexp" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_ir = { path = "../ir" } diff --git a/crates/core/src/parse.rs b/crates/ir_sexp/src/lib.rs similarity index 51% rename from crates/core/src/parse.rs rename to crates/ir_sexp/src/lib.rs index 8a2d4df..c2dc50d 100644 --- a/crates/core/src/parse.rs +++ b/crates/ir_sexp/src/lib.rs @@ -1,27 +1,14 @@ -use std::fmt::{Display, Debug}; +use std::fmt::{Debug, Display}; -use crate::cfg::{TokenSet, Syntax}; -use crate::lex::Token; - -pub trait ParserImpl<'a> -where - Self: Sized, -{ - type TokenSet: TokenSet<'a> + 'a; - type Syntax: Syntax<'a, TokenSet = Self::TokenSet>; - - fn setup() -> anyhow::Result; - fn parse<'b>( - &self, - lexer: impl Iterator>, - ) -> anyhow::Result>; -} +use copager_cfg::token::Token; +use copager_cfg::{RuleKind, TokenKind}; +use copager_ir::{IR, IRBuilder}; #[derive(Debug)] pub enum SExp<'a, 'b, T, S> where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>, + T: TokenKind<'a> + 'a, + S: RuleKind<'a, TokenKind = T>, { List { tag: S, @@ -32,8 +19,8 @@ where impl<'a, T, S> Display for SExp<'a, '_, T, S> where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T> + Debug, + T: TokenKind<'a> + 'a, + S: RuleKind<'a, TokenKind = T> + Debug, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -49,38 +36,56 @@ where } } +impl<'a, 'b, T, R> IR<'a, T, R> for SExp<'a, 'b, T, R> +where + T: TokenKind<'a> + 'a, + R: RuleKind<'a, TokenKind = T>, +{ + type Builder = SExpBuilder<'a, 'b, T, R>; +} + #[derive(Debug)] -pub struct SExpBuilder<'a, 'b, T, S> +pub struct SExpBuilder<'a, 'b, T, R> where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>, + T: TokenKind<'a> + 'a, + R: RuleKind<'a, TokenKind = T>, { - stack: Vec>, + stack: Vec>, } -impl<'a, 'b, T, S> SExpBuilder<'a, 'b, T, S> +impl <'a, 'b, T, R> IRBuilder<'a> for SExpBuilder<'a, 'b, T, R> where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>, + T: TokenKind<'a> + 'a, + R: RuleKind<'a, TokenKind = T>, { - pub fn new() -> SExpBuilder<'a, 'b, T, S> { + type TokenKind = T; + type RuleKind = R; + type Output = SExp<'a, 'b, T, R>; + + fn new() -> SExpBuilder<'a, 'b, T, R> { SExpBuilder { stack: vec![] } } + fn build(mut self) -> anyhow::Result> { + if self.stack.len() == 1 { + Ok(self.stack.pop().unwrap()) + } else { + Err(anyhow::anyhow!("Invalid S-Expression")) + } + } +} + +impl<'a, 'b, T, R> SExpBuilder<'a, 'b, T, R> +where + T: TokenKind<'a> + 'a, + R: RuleKind<'a, TokenKind = T>, +{ pub fn push(&mut self, token: Token<'a, 'b, T>) { self.stack.push(SExp::Atom(token)); } - pub fn wrap(&mut self, tag: S, cnt: usize) { + pub fn wrap(&mut self, tag: R, cnt: usize) { let elems = self.stack.split_off(self.stack.len() - cnt); self.stack.push(SExp::List { tag, elems }); } - - pub fn build(mut self) -> anyhow::Result> { - if self.stack.len() == 1 { - Ok(self.stack.pop().unwrap()) - } else { - Err(anyhow::anyhow!("Invalid S-Expression")) - } - } } diff --git a/crates/lex/Cargo.toml b/crates/lex/Cargo.toml new file mode 100644 index 0000000..ed72c3a --- /dev/null +++ b/crates/lex/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "copager_lex" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs new file mode 100644 index 0000000..b3e14f5 --- /dev/null +++ b/crates/lex/src/lib.rs @@ -0,0 +1,36 @@ +use std::marker::PhantomData; + +use copager_cfg::token::Token; +use copager_cfg::TokenKind; + +pub struct Lexer<'a, 'b, T, I> +where + T: TokenKind<'a>, + I: LexIterator<'a, 'b>, +{ + _phantom_t: PhantomData<&'a T>, + _phantom_b: PhantomData<&'b str>, + _phantom_itr: PhantomData, +} + +impl<'a, 'b, T, I> Lexer<'a, 'b, T, I> +where + T: TokenKind<'a>, + I: LexIterator<'a, 'b>, +{ + pub fn new(input: &'b str) -> anyhow::Result> + where + T: TokenKind<'a> + 'a, + { + I::try_from(input) + } +} + +pub trait LexIterator<'a, 'b> +where + Self: Sized + TryFrom<&'b str, Error = anyhow::Error>, +{ + type TokenKind: TokenKind<'a>; + + fn next(&mut self) -> Option>; +} diff --git a/crates/lex_regex/Cargo.toml b/crates/lex_regex/Cargo.toml new file mode 100644 index 0000000..7bf5954 --- /dev/null +++ b/crates/lex_regex/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "copager_lex_regex" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +regex = "1.10.4" +regex-macro = "0.2.0" +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs new file mode 100644 index 0000000..edcf6af --- /dev/null +++ b/crates/lex_regex/src/lib.rs @@ -0,0 +1,77 @@ +use std::marker::PhantomData; + +use regex::{Regex, RegexSet}; + +use copager_cfg::token::Token; +use copager_cfg::TokenKind; +use copager_lex::LexIterator; + +struct RegexLexer<'a, 'b, T: TokenKind<'a>> { + // Regex + regex_istr: Regex, + regex_set: RegexSet, + regex_map: Vec<(Regex, T)>, + + // State + input: &'b str, + pos: usize, + + // PhantomData + _phantom: PhantomData<&'a T>, +} + +impl<'a, 'b, T: TokenKind<'a>> TryFrom<&'b str> for RegexLexer<'a, 'b, T> { + type Error = anyhow::Error; + + fn try_from(input: &'b str) -> anyhow::Result { + let regex_istr = Regex::new(T::ignore_str())?; + let regex_set = T::into_iter() + .map(|token| T::as_str(&token)) + .collect::>(); + let regex_set = RegexSet::new(regex_set)?; + let regex_map = T::into_iter() + .map(|token| Ok((Regex::new(token.as_str())?, token))) + .collect::>>()?; + + Ok(RegexLexer { + regex_istr, + regex_set, + regex_map, + input, + pos: 0, + _phantom: PhantomData, + }) + } +} + +impl<'a, 'b, T: TokenKind<'a> + 'a> LexIterator<'a, 'b> for RegexLexer<'a, 'b, T> { + type TokenKind = T; + + fn next(&mut self) -> Option> { + // Skip Spaces + let remain = match self.regex_istr.find(&self.input[self.pos..]) { + Some(acc_s) => { + self.pos += acc_s.len(); + &self.input[self.pos..] + } + None => &self.input[self.pos..] + }; + + // Find the token + let mut matches = self + .regex_set + .matches(remain) + .into_iter() + .map(|idx| &self.regex_map[idx]) + .map(|(regex, token)| (*token, regex.find(remain).unwrap().as_str())) + .collect::>(); + matches.sort_by(|(_, a), (_, b)| a.len().cmp(&b.len())); + + // Update myself + let (token, acc_s) = matches.first()?; + let range = (self.pos, self.pos + acc_s.len()); + self.pos += acc_s.len(); + + Some(Token::new(*token, &self.input, range)) + } +} diff --git a/src/lib.rs b/src/lib.rs index ca8197e..13023d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,4 @@ pub use copager_core::*; +pub use copager_cfg as cfg; +pub use copager_lex as lex; pub use copager_algorithm as algorithm; From 3c7204d596255ea0e04e3918a3c6590b3f4668bf Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Tue, 27 Aug 2024 00:49:32 +0900 Subject: [PATCH 03/55] =?UTF-8?q?[wip]=20algoriothm=5F*=20=E2=86=92=20pars?= =?UTF-8?q?e=5F*?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 40 +++++++++---------- Cargo.toml | 8 ++-- crates/algorithm/Cargo.toml | 7 ---- crates/parse/Cargo.toml | 7 ++++ crates/{algorithm => parse}/src/lib.rs | 0 .../{algorithm_lr1 => parse_lr1}/Cargo.toml | 2 +- .../src/builder.rs | 0 .../src/driver.rs | 0 .../{algorithm_lr1 => parse_lr1}/src/error.rs | 0 .../{algorithm_lr1 => parse_lr1}/src/lib.rs | 0 10 files changed, 32 insertions(+), 32 deletions(-) delete mode 100644 crates/algorithm/Cargo.toml create mode 100644 crates/parse/Cargo.toml rename crates/{algorithm => parse}/src/lib.rs (100%) rename crates/{algorithm_lr1 => parse_lr1}/Cargo.toml (87%) rename crates/{algorithm_lr1 => parse_lr1}/src/builder.rs (100%) rename crates/{algorithm_lr1 => parse_lr1}/src/driver.rs (100%) rename crates/{algorithm_lr1 => parse_lr1}/src/error.rs (100%) rename crates/{algorithm_lr1 => parse_lr1}/src/lib.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 92e1c8d..8f120d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,8 +22,6 @@ name = "copager" version = "0.1.1" dependencies = [ "anyhow", - "copager_algorithm", - "copager_algorithm_lr1", "copager_cfg", "copager_cfg_derive", "copager_core", @@ -31,28 +29,12 @@ dependencies = [ "copager_ir_sexp", "copager_lex", "copager_lex_regex", + "copager_parse", + "copager_parse_lr1", "serde", "serde_json", ] -[[package]] -name = "copager_algorithm" -version = "0.1.1" -dependencies = [ - "copager_algorithm_lr1", -] - -[[package]] -name = "copager_algorithm_lr1" -version = "0.1.1" -dependencies = [ - "anyhow", - "copager_core", - "itertools", - "serde", - "thiserror", -] - [[package]] name = "copager_cfg" version = "0.1.1" @@ -123,6 +105,24 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_parse" +version = "0.1.1" +dependencies = [ + "copager_parse_lr1", +] + +[[package]] +name = "copager_parse_lr1" +version = "0.1.1" +dependencies = [ + "anyhow", + "copager_core", + "itertools", + "serde", + "thiserror", +] + [[package]] name = "either" version = "1.13.0" diff --git a/Cargo.toml b/Cargo.toml index 3f721c9..9f6d568 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,12 +5,12 @@ edition = "2021" [dependencies] copager_core = { path = "./crates/core" } -copager_algorithm = { path = "./crates/algorithm" } -copager_algorithm_lr1 = { path = "./crates/algorithm_lr1" } copager_cfg = { path = "./crates/cfg" } copager_cfg_derive = { path = "./crates/cfg_derive" } copager_lex = { path = "./crates/lex" } copager_lex_regex = { path = "./crates/lex_regex" } +copager_parse = { path = "./crates/parse" } +copager_parse_lr1 = { path = "./crates/parse_lr1" } copager_ir = { path = "./crates/ir" } copager_ir_sexp = { path = "./crates/ir_sexp" } @@ -26,12 +26,12 @@ default = [] resolver = "2" members = [ "./crates/core", - "./crates/algorithm", - "./crates/algorithm_lr1", "./crates/cfg", "./crates/cfg_derive", "./crates/lex", "./crates/lex_regex", + "./crates/parse", + "./crates/parse_lr1", "./crates/ir", "./crates/ir_sexp", ] diff --git a/crates/algorithm/Cargo.toml b/crates/algorithm/Cargo.toml deleted file mode 100644 index c0f01bd..0000000 --- a/crates/algorithm/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "copager_algorithm" -version = "0.1.1" -edition = "2021" - -[dependencies] -lr1 = { package = "copager_algorithm_lr1", path = "../algorithm_lr1" } diff --git a/crates/parse/Cargo.toml b/crates/parse/Cargo.toml new file mode 100644 index 0000000..52a0d88 --- /dev/null +++ b/crates/parse/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "copager_parse" +version = "0.1.1" +edition = "2021" + +[dependencies] +lr1 = { package = "copager_parse_lr1", path = "../parse_lr1" } diff --git a/crates/algorithm/src/lib.rs b/crates/parse/src/lib.rs similarity index 100% rename from crates/algorithm/src/lib.rs rename to crates/parse/src/lib.rs diff --git a/crates/algorithm_lr1/Cargo.toml b/crates/parse_lr1/Cargo.toml similarity index 87% rename from crates/algorithm_lr1/Cargo.toml rename to crates/parse_lr1/Cargo.toml index 0859836..05b7c16 100644 --- a/crates/algorithm_lr1/Cargo.toml +++ b/crates/parse_lr1/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "copager_algorithm_lr1" +name = "copager_parse_lr1" version = "0.1.1" edition = "2021" diff --git a/crates/algorithm_lr1/src/builder.rs b/crates/parse_lr1/src/builder.rs similarity index 100% rename from crates/algorithm_lr1/src/builder.rs rename to crates/parse_lr1/src/builder.rs diff --git a/crates/algorithm_lr1/src/driver.rs b/crates/parse_lr1/src/driver.rs similarity index 100% rename from crates/algorithm_lr1/src/driver.rs rename to crates/parse_lr1/src/driver.rs diff --git a/crates/algorithm_lr1/src/error.rs b/crates/parse_lr1/src/error.rs similarity index 100% rename from crates/algorithm_lr1/src/error.rs rename to crates/parse_lr1/src/error.rs diff --git a/crates/algorithm_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs similarity index 100% rename from crates/algorithm_lr1/src/lib.rs rename to crates/parse_lr1/src/lib.rs From 487ea857de85ee0a79fe906fa9c0ea042e5a8de9 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Wed, 28 Aug 2024 20:05:56 +0900 Subject: [PATCH 04/55] =?UTF-8?q?[add]=20utils=20=E3=82=AF=E3=83=AC?= =?UTF-8?q?=E3=83=BC=E3=83=88=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Cacheable トレイト --- crates/utils/Cargo.toml | 8 ++++++++ crates/utils/src/cache.rs | 9 +++++++++ crates/utils/src/lib.rs | 1 + 3 files changed, 18 insertions(+) create mode 100644 crates/utils/Cargo.toml create mode 100644 crates/utils/src/cache.rs create mode 100644 crates/utils/src/lib.rs diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml new file mode 100644 index 0000000..0f040b9 --- /dev/null +++ b/crates/utils/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "copager_utils" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } diff --git a/crates/utils/src/cache.rs b/crates/utils/src/cache.rs new file mode 100644 index 0000000..1f42e4c --- /dev/null +++ b/crates/utils/src/cache.rs @@ -0,0 +1,9 @@ +pub trait Cacheable<'cache, F> +where + Self: Sized, +{ + type Cache; + + fn new(from: F) -> anyhow::Result; + fn restore(cache: &'cache Self::Cache) -> Self; +} diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs new file mode 100644 index 0000000..a5c08fd --- /dev/null +++ b/crates/utils/src/lib.rs @@ -0,0 +1 @@ +pub mod cache; From 9f3d7e5da506d63d2048e650e1ca3ad921af04ac Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Wed, 28 Aug 2024 20:07:18 +0900 Subject: [PATCH 05/55] =?UTF-8?q?[change]=20LexIterator=20=E4=BB=95?= =?UTF-8?q?=E6=A7=98=E5=A4=89=E6=9B=B4=20&=20LexSource=20=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit とりあえず LexIterator は Cacheable を持つことを強制 --- crates/lex/Cargo.toml | 1 + crates/lex/src/lib.rs | 59 ++++++++++++++++++--------- crates/lex_regex/Cargo.toml | 1 + crates/lex_regex/src/lib.rs | 80 ++++++++++++++++++++++++------------- 4 files changed, 95 insertions(+), 46 deletions(-) diff --git a/crates/lex/Cargo.toml b/crates/lex/Cargo.toml index ed72c3a..6e5eeb8 100644 --- a/crates/lex/Cargo.toml +++ b/crates/lex/Cargo.toml @@ -7,3 +7,4 @@ edition = "2021" anyhow = { workspace = true } thiserror = { workspace = true } copager_cfg = { path = "../cfg" } +copager_utils = { path = "../utils" } diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs index b3e14f5..02f8dec 100644 --- a/crates/lex/src/lib.rs +++ b/crates/lex/src/lib.rs @@ -1,36 +1,59 @@ use std::marker::PhantomData; -use copager_cfg::token::Token; -use copager_cfg::TokenKind; +use copager_cfg::token::{TokenTag, Token}; +use copager_utils::cache::Cacheable; -pub struct Lexer<'a, 'b, T, I> +pub struct Lexer<'cache, 'input, T, S, I> where - T: TokenKind<'a>, - I: LexIterator<'a, 'b>, + T: TokenTag, + S: LexSource, + I: LexIterator<'cache, 'input, T, S>, { - _phantom_t: PhantomData<&'a T>, - _phantom_b: PhantomData<&'b str>, - _phantom_itr: PhantomData, + cache: I::Cache, + _phantom_s: PhantomData, + _phantom_t: PhantomData<&'input T>, } -impl<'a, 'b, T, I> Lexer<'a, 'b, T, I> +impl<'cache, 'input, T, S, I> Lexer<'cache, 'input, T, S, I> where - T: TokenKind<'a>, - I: LexIterator<'a, 'b>, + T: TokenTag, + S: LexSource, + I: LexIterator<'cache, 'input, T, S>, { - pub fn new(input: &'b str) -> anyhow::Result> + pub fn new() -> anyhow::Result where - T: TokenKind<'a> + 'a, + S: Default, { - I::try_from(input) + Self::try_from(S::default()) + } + + pub fn try_from(source: S) -> anyhow::Result { + Ok(Lexer { + cache: I::new(source)?, + _phantom_s: PhantomData, + _phantom_t: PhantomData, + }) + } + + pub fn iter(&'cache self, input: &'input str) -> I { + I::restore(&self.cache).init(input) } } -pub trait LexIterator<'a, 'b> +pub trait LexSource where - Self: Sized + TryFrom<&'b str, Error = anyhow::Error>, + T: TokenTag, { - type TokenKind: TokenKind<'a>; + fn ignore_token(&self) -> &str; + fn iter(&self) -> impl Iterator; +} - fn next(&mut self) -> Option>; +pub trait LexIterator<'cache, 'input, T, S> +where + Self: Sized + Cacheable<'cache, S>, + T: TokenTag, + S: LexSource, +{ + fn init(&self, input: &'input str) -> Self; + fn next(&mut self) -> Option>; } diff --git a/crates/lex_regex/Cargo.toml b/crates/lex_regex/Cargo.toml index 7bf5954..40cc31d 100644 --- a/crates/lex_regex/Cargo.toml +++ b/crates/lex_regex/Cargo.toml @@ -10,3 +10,4 @@ regex = "1.10.4" regex-macro = "0.2.0" copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } +copager_utils = { path = "../utils" } diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index edcf6af..b51df79 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -1,53 +1,77 @@ -use std::marker::PhantomData; - use regex::{Regex, RegexSet}; -use copager_cfg::token::Token; -use copager_cfg::TokenKind; -use copager_lex::LexIterator; +use copager_cfg::token::{TokenTag, Token}; +use copager_lex::{LexSource, LexIterator}; +use copager_utils::cache::Cacheable; -struct RegexLexer<'a, 'b, T: TokenKind<'a>> { - // Regex - regex_istr: Regex, - regex_set: RegexSet, - regex_map: Vec<(Regex, T)>, +struct RegexLexer<'cache, 'input, T: TokenTag> { + // regex + regex_istr: &'cache Regex, + regex_set: &'cache RegexSet, + regex_map: &'cache Vec<(Regex, T)>, - // State - input: &'b str, + // state + input: &'input str, pos: usize, +} - // PhantomData - _phantom: PhantomData<&'a T>, +struct RegexLexerCache { + regex_istr: Regex, + regex_set: RegexSet, + regex_map: Vec<(Regex, T)>, } -impl<'a, 'b, T: TokenKind<'a>> TryFrom<&'b str> for RegexLexer<'a, 'b, T> { - type Error = anyhow::Error; +impl<'cache, 'input, T, S> Cacheable<'cache, S> for RegexLexer<'cache, 'input, T> +where + T: TokenTag, + S: LexSource, +{ + type Cache = RegexLexerCache; - fn try_from(input: &'b str) -> anyhow::Result { - let regex_istr = Regex::new(T::ignore_str())?; - let regex_set = T::into_iter() - .map(|token| T::as_str(&token)) + fn new(source: S) -> anyhow::Result { + let regex_istr = Regex::new(source.ignore_token())?; + let regex_set = source.iter() + .map(|token| token.as_str()) .collect::>(); let regex_set = RegexSet::new(regex_set)?; - let regex_map = T::into_iter() + let regex_map = source.iter() .map(|token| Ok((Regex::new(token.as_str())?, token))) .collect::>>()?; - Ok(RegexLexer { + Ok(RegexLexerCache { regex_istr, regex_set, regex_map, - input, - pos: 0, - _phantom: PhantomData, }) } + + fn restore(cache: &'cache Self::Cache) -> Self { + RegexLexer { + regex_istr: &cache.regex_istr, + regex_set: &cache.regex_set, + regex_map: &cache.regex_map, + input: "", + pos: 0, + } + } } -impl<'a, 'b, T: TokenKind<'a> + 'a> LexIterator<'a, 'b> for RegexLexer<'a, 'b, T> { - type TokenKind = T; +impl<'cache, 'input, T, S> LexIterator<'cache, 'input, T, S> for RegexLexer<'cache, 'input, T> +where + T: TokenTag, + S: LexSource, +{ + fn init(&self, input: &'input str) -> Self { + RegexLexer { + regex_istr: self.regex_istr, + regex_set: self.regex_set, + regex_map: self.regex_map, + input: input, + pos: 0, + } + } - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { // Skip Spaces let remain = match self.regex_istr.find(&self.input[self.pos..]) { Some(acc_s) => { From bd5539fc0dfa75f8167f2c31ffb3e564663b7628 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Wed, 28 Aug 2024 20:10:47 +0900 Subject: [PATCH 06/55] =?UTF-8?q?[change]=20TokenKind=20=E5=89=8A=E9=99=A4?= =?UTF-8?q?=20&=20token/TokenTag=20=E3=81=A8=E3=81=97=E3=81=A6=E5=86=8D?= =?UTF-8?q?=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/lib.rs | 18 +- crates/cfg/src/rule.rs | 451 ++++++++++++++++++++-------------------- crates/cfg/src/token.rs | 27 ++- 3 files changed, 243 insertions(+), 253 deletions(-) diff --git a/crates/cfg/src/lib.rs b/crates/cfg/src/lib.rs index 88dec6b..892e40a 100644 --- a/crates/cfg/src/lib.rs +++ b/crates/cfg/src/lib.rs @@ -3,27 +3,19 @@ pub mod token; use std::hash::Hash; +use token::TokenTag; use rule::{Rule, RuleSet}; -pub trait TokenKind<'a> -where - Self: Copy + Clone + Hash + Eq, -{ - fn as_str(&self) -> &'a str; - fn ignore_str() -> &'a str; - fn into_iter() -> impl Iterator; -} - -pub trait RuleKind<'a> +pub trait RuleKind where Self: Clone + Hash + Eq, + T: TokenTag, { - type TokenKind: crate::TokenKind<'a>; - fn into_rules(&self) -> Vec>; + fn into_rules(&self) -> Vec>; fn into_iter() -> impl Iterator; - fn into_ruleset() -> RuleSet<'a, Self::TokenKind> { + fn into_ruleset() -> RuleSet { Self::into_iter() .enumerate() .flat_map(|(idx, elem)| { diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 8724ca7..df65cea 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -1,24 +1,23 @@ use std::collections::HashMap; use std::hash::Hash; -use std::marker::PhantomData; -use crate::TokenKind; +use crate::token::TokenTag; #[derive(PartialEq, Eq, Hash, Debug)] -pub struct Rule<'a, T: TokenKind<'a>> { +pub struct Rule { pub id: usize, - pub lhs: RuleElem<'a, T>, - pub rhs: Vec>, + pub lhs: RuleElem, + pub rhs: Vec>, } -impl<'a, T: TokenKind<'a>> From<(RuleElem<'a, T>, Vec>)> for Rule<'a, T> { - fn from((lhs, rhs): (RuleElem<'a, T>, Vec>)) -> Self { +impl From<(RuleElem, Vec>)> for Rule { + fn from((lhs, rhs): (RuleElem, Vec>)) -> Self { Rule { id: 0, lhs, rhs } } } -impl<'a, T: TokenKind<'a>> Rule<'a, T> { - pub fn nonterms<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { +impl Rule { + pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { let mut l_nonterms = vec![&self.lhs]; let r_nonterms: Vec<&RuleElem> = self .rhs @@ -29,7 +28,7 @@ impl<'a, T: TokenKind<'a>> Rule<'a, T> { l_nonterms } - pub fn terms<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { + pub fn terms<'a>(&'a self) -> Vec<&'a RuleElem> { self.rhs .iter() .filter(|token| matches!(token, RuleElem::::Term(_))) @@ -38,13 +37,13 @@ impl<'a, T: TokenKind<'a>> Rule<'a, T> { } #[derive(Debug, Eq)] -pub enum RuleElem<'a, T: TokenKind<'a>> { +pub enum RuleElem { NonTerm(String), - Term((T, PhantomData<&'a ()>)), + Term(T), EOF, } -impl<'a, T: TokenKind<'a>> Hash for RuleElem<'a, T> { +impl Hash for RuleElem { fn hash(&self, state: &mut H) { match self { RuleElem::NonTerm(s) => s.hash(state), @@ -54,7 +53,7 @@ impl<'a, T: TokenKind<'a>> Hash for RuleElem<'a, T> { } } -impl<'a, T: TokenKind<'a>> PartialEq for RuleElem<'a, T> { +impl PartialEq for RuleElem { fn eq(&self, other: &Self) -> bool { match (self, other) { (RuleElem::NonTerm(s1), RuleElem::NonTerm(s2)) => s1 == s2, @@ -65,26 +64,26 @@ impl<'a, T: TokenKind<'a>> PartialEq for RuleElem<'a, T> { } } -impl<'a, T: TokenKind<'a>> RuleElem<'a, T> { - pub fn new_nonterm>(t: U) -> RuleElem<'a, T> { +impl RuleElem { + pub fn new_nonterm>(t: U) -> RuleElem { RuleElem::NonTerm(t.into()) } - pub fn new_term(t: T) -> RuleElem<'a, T> { - RuleElem::Term((t, PhantomData)) + pub fn new_term(t: T) -> RuleElem { + RuleElem::Term(t) } } #[derive(Debug)] -pub struct RuleSet<'a, T: TokenKind<'a>> { +pub struct RuleSet { pub top: String, - pub rules: Vec>, + pub rules: Vec>, } -impl<'a, T: TokenKind<'a>> FromIterator> for RuleSet<'a, T> { +impl FromIterator> for RuleSet { fn from_iter(rules: I) -> Self where - I: IntoIterator>, + I: IntoIterator>, { let rules = rules.into_iter().collect::>(); let top = match &rules[0].lhs { @@ -95,23 +94,23 @@ impl<'a, T: TokenKind<'a>> FromIterator> for RuleSet<'a, T> { } } -impl<'a, T: TokenKind<'a>> RuleSet<'a, T> { - pub fn nonterms<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { +impl RuleSet { + pub fn nonterms<'a>(&'a self) -> Vec<&'a RuleElem> { self.rules.iter().flat_map(|rule| rule.nonterms()).collect() } - pub fn terms<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { + pub fn terms<'a>(&'a self) -> Vec<&'a RuleElem> { self.rules.iter().flat_map(|rule| rule.terms()).collect() } - pub fn find_rule<'b>(&'b self, target: &RuleElem<'a, T>) -> Vec<&'b Rule<'a, T>> { + pub fn find_rule<'a>(&'a self, target: &RuleElem) -> Vec<&'a Rule> { self.rules .iter() .filter(|rule| &rule.lhs == target) .collect() } - pub fn first_set<'b>(&'b self) -> HashMap<&'b RuleElem<'a, T>, Vec<&'b RuleElem<'a, T>>> { + pub fn first_set<'a>(&'a self) -> HashMap<&'a RuleElem, Vec<&'a RuleElem>> { // 1. Calc a null set let nulls_set = self.nulls_set(); @@ -164,7 +163,7 @@ impl<'a, T: TokenKind<'a>> RuleSet<'a, T> { first_set } - fn nulls_set<'b>(&'b self) -> Vec<&'b RuleElem<'a, T>> { + fn nulls_set<'a>(&'a self) -> Vec<&'a RuleElem> { // 1. Find null rules let mut nulls_set: Vec<&RuleElem> = self .rules @@ -193,201 +192,201 @@ impl<'a, T: TokenKind<'a>> RuleSet<'a, T> { } } -#[cfg(test)] -mod test { - use std::collections::HashMap; - - use crate::{TokenKind, RuleKind}; - - use super::{Rule, RuleElem}; - - #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] - enum TestToken { - Num, - Plus, - Minus, - Mul, - Div, - BracketA, - BracketB, - } - - impl TokenKind<'_> for TestToken { - fn as_str(&self) -> &'static str { - match self { - TestToken::Num => r"^[1-9][0-9]*", - TestToken::Plus => r"^\+", - TestToken::Minus => r"^-", - TestToken::Mul => r"^\*", - TestToken::Div => r"^/", - TestToken::BracketA => r"^\(", - TestToken::BracketB => r"^\)", - } - } - - fn ignore_str() -> &'static str { - r"^[ \t\n]+" - } - - fn into_iter() -> impl Iterator { - vec![ - TestToken::Num, - TestToken::Plus, - TestToken::Minus, - TestToken::Mul, - TestToken::Div, - TestToken::BracketA, - TestToken::BracketB, - ] - .into_iter() - } - } - - #[derive(Debug, Clone, Hash, PartialEq, Eq)] - enum TestRule { - ExprPlus, - ExprMinus, - Expr2Term, - TermMul, - TermDiv, - Term2Fact, - Fact2Expr, - Fact2Num, - } - - impl<'a> RuleKind<'a> for TestRule { - type TokenKind = TestToken; - - fn into_iter() -> impl Iterator { - Box::new( - vec![ - TestRule::ExprPlus, - TestRule::ExprMinus, - TestRule::Expr2Term, - TestRule::TermMul, - TestRule::TermDiv, - TestRule::Term2Fact, - TestRule::Fact2Expr, - TestRule::Fact2Num, - ] - .into_iter(), - ) - } - - fn into_rules(&self) -> Vec> { - let expr_plus = Rule::from(( - RuleElem::new_nonterm("expr"), - vec![ - RuleElem::new_nonterm("expr"), - RuleElem::new_term(TestToken::Plus), - RuleElem::new_nonterm("term"), - ], - )); - - let expr_minus = Rule::from(( - RuleElem::new_nonterm("expr"), - vec![ - RuleElem::new_nonterm("expr"), - RuleElem::new_term(TestToken::Minus), - RuleElem::new_nonterm("term"), - ], - )); - - let expr_2_term = Rule::::from(( - RuleElem::new_nonterm("expr"), - vec![RuleElem::new_nonterm("term")], - )); - - let term_mul = Rule::from(( - RuleElem::new_nonterm("term"), - vec![ - RuleElem::new_nonterm("term"), - RuleElem::new_term(TestToken::Mul), - RuleElem::new_nonterm("fact"), - ], - )); - - let term_div = Rule::from(( - RuleElem::new_nonterm("term"), - vec![ - RuleElem::new_nonterm("term"), - RuleElem::new_term(TestToken::Div), - RuleElem::new_nonterm("fact"), - ], - )); - - let term_2_fact = Rule::::from(( - RuleElem::new_nonterm("term"), - vec![RuleElem::new_nonterm("fact")], - )); - - let fact_2_expr = Rule::from(( - RuleElem::new_nonterm("fact"), - vec![ - RuleElem::new_term(TestToken::BracketA), - RuleElem::new_nonterm("expr"), - RuleElem::new_term(TestToken::BracketB), - ], - )); - - let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![])); - - match self { - TestRule::ExprPlus => vec![expr_plus], - TestRule::ExprMinus => vec![expr_minus], - TestRule::Expr2Term => vec![expr_2_term], - TestRule::TermMul => vec![term_mul], - TestRule::TermDiv => vec![term_div], - TestRule::Term2Fact => vec![term_2_fact], - TestRule::Fact2Expr => vec![fact_2_expr], - TestRule::Fact2Num => vec![fact_2_num], - } - } - } - - fn check>( - first_set: &HashMap<&RuleElem, Vec<&RuleElem>>, - nonterm: T, - exp_terms: Vec, - ) { - let nonterms = RuleElem::::new_nonterm(nonterm); - let exp_terms: Vec> = exp_terms - .into_iter() - .map(|term| RuleElem::new_term(term)) - .collect(); - assert!(first_set.get(&nonterms).unwrap().len() == exp_terms.len()); - - let result = first_set - .get(&nonterms) - .unwrap() - .into_iter() - .zip(exp_terms.into_iter()) - .any(|(a, b)| a == &&b); - assert!(result); - } - - #[test] - fn first_set() { - let ruleset = ::into_ruleset(); - let first_set = ruleset.first_set(); - - check( - &first_set, - "expr", - vec![ - TestToken::Plus, - TestToken::Minus, - TestToken::Mul, - TestToken::Div, - TestToken::BracketA, - ], - ); - check( - &first_set, - "term", - vec![TestToken::Mul, TestToken::Div, TestToken::BracketA], - ); - check(&first_set, "fact", vec![TestToken::BracketA]); - } -} - +// #[cfg(test)] +// mod test { +// use std::collections::HashMap; + +// use crate::token::TokenTag; +// use crate::RuleKind; + +// use super::{Rule, RuleElem}; + +// #[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +// enum TestToken { +// Num, +// Plus, +// Minus, +// Mul, +// Div, +// BracketA, +// BracketB, +// } + +// impl TokenKind<'_> for TestToken { +// fn as_str(&self) -> &'static str { +// match self { +// TestToken::Num => r"^[1-9][0-9]*", +// TestToken::Plus => r"^\+", +// TestToken::Minus => r"^-", +// TestToken::Mul => r"^\*", +// TestToken::Div => r"^/", +// TestToken::BracketA => r"^\(", +// TestToken::BracketB => r"^\)", +// } +// } + +// fn ignore_str() -> &'static str { +// r"^[ \t\n]+" +// } + +// fn into_iter() -> impl Iterator { +// vec![ +// TestToken::Num, +// TestToken::Plus, +// TestToken::Minus, +// TestToken::Mul, +// TestToken::Div, +// TestToken::BracketA, +// TestToken::BracketB, +// ] +// .into_iter() +// } +// } + +// #[derive(Debug, Clone, Hash, PartialEq, Eq)] +// enum TestRule { +// ExprPlus, +// ExprMinus, +// Expr2Term, +// TermMul, +// TermDiv, +// Term2Fact, +// Fact2Expr, +// Fact2Num, +// } + +// impl<'a> RuleKind<'a> for TestRule { +// type TokenKind = TestToken; + +// fn into_iter() -> impl Iterator { +// Box::new( +// vec![ +// TestRule::ExprPlus, +// TestRule::ExprMinus, +// TestRule::Expr2Term, +// TestRule::TermMul, +// TestRule::TermDiv, +// TestRule::Term2Fact, +// TestRule::Fact2Expr, +// TestRule::Fact2Num, +// ] +// .into_iter(), +// ) +// } + +// fn into_rules(&self) -> Vec> { +// let expr_plus = Rule::from(( +// RuleElem::new_nonterm("expr"), +// vec![ +// RuleElem::new_nonterm("expr"), +// RuleElem::new_term(TestToken::Plus), +// RuleElem::new_nonterm("term"), +// ], +// )); + +// let expr_minus = Rule::from(( +// RuleElem::new_nonterm("expr"), +// vec![ +// RuleElem::new_nonterm("expr"), +// RuleElem::new_term(TestToken::Minus), +// RuleElem::new_nonterm("term"), +// ], +// )); + +// let expr_2_term = Rule::::from(( +// RuleElem::new_nonterm("expr"), +// vec![RuleElem::new_nonterm("term")], +// )); + +// let term_mul = Rule::from(( +// RuleElem::new_nonterm("term"), +// vec![ +// RuleElem::new_nonterm("term"), +// RuleElem::new_term(TestToken::Mul), +// RuleElem::new_nonterm("fact"), +// ], +// )); + +// let term_div = Rule::from(( +// RuleElem::new_nonterm("term"), +// vec![ +// RuleElem::new_nonterm("term"), +// RuleElem::new_term(TestToken::Div), +// RuleElem::new_nonterm("fact"), +// ], +// )); + +// let term_2_fact = Rule::::from(( +// RuleElem::new_nonterm("term"), +// vec![RuleElem::new_nonterm("fact")], +// )); + +// let fact_2_expr = Rule::from(( +// RuleElem::new_nonterm("fact"), +// vec![ +// RuleElem::new_term(TestToken::BracketA), +// RuleElem::new_nonterm("expr"), +// RuleElem::new_term(TestToken::BracketB), +// ], +// )); + +// let fact_2_num = Rule::from((RuleElem::new_nonterm("fact"), vec![])); + +// match self { +// TestRule::ExprPlus => vec![expr_plus], +// TestRule::ExprMinus => vec![expr_minus], +// TestRule::Expr2Term => vec![expr_2_term], +// TestRule::TermMul => vec![term_mul], +// TestRule::TermDiv => vec![term_div], +// TestRule::Term2Fact => vec![term_2_fact], +// TestRule::Fact2Expr => vec![fact_2_expr], +// TestRule::Fact2Num => vec![fact_2_num], +// } +// } +// } + +// fn check>( +// first_set: &HashMap<&RuleElem, Vec<&RuleElem>>, +// nonterm: T, +// exp_terms: Vec, +// ) { +// let nonterms = RuleElem::::new_nonterm(nonterm); +// let exp_terms: Vec> = exp_terms +// .into_iter() +// .map(|term| RuleElem::new_term(term)) +// .collect(); +// assert!(first_set.get(&nonterms).unwrap().len() == exp_terms.len()); + +// let result = first_set +// .get(&nonterms) +// .unwrap() +// .into_iter() +// .zip(exp_terms.into_iter()) +// .any(|(a, b)| a == &&b); +// assert!(result); +// } + +// #[test] +// fn first_set() { +// let ruleset = ::into_ruleset(); +// let first_set = ruleset.first_set(); + +// check( +// &first_set, +// "expr", +// vec![ +// TestToken::Plus, +// TestToken::Minus, +// TestToken::Mul, +// TestToken::Div, +// TestToken::BracketA, +// ], +// ); +// check( +// &first_set, +// "term", +// vec![TestToken::Mul, TestToken::Div, TestToken::BracketA], +// ); +// check(&first_set, "fact", vec![TestToken::BracketA]); +// } +// } diff --git a/crates/cfg/src/token.rs b/crates/cfg/src/token.rs index 8e0b4be..a73cde2 100644 --- a/crates/cfg/src/token.rs +++ b/crates/cfg/src/token.rs @@ -1,26 +1,25 @@ -use std::marker::PhantomData; +use std::hash::Hash; -use crate::TokenKind; +pub trait TokenTag +where + Self: Copy + Clone + Hash + Eq, +{ + fn as_str<'a, 'b>(&'a self) -> &'b str; +} #[derive(Debug, Copy, Clone)] -pub struct Token<'a, 'b, T: TokenKind<'a>> { +pub struct Token<'input, T: TokenTag> { pub kind: T, - pub src: &'b str, + pub src: &'input str, pub range: (usize, usize), - _phantom: PhantomData<&'a ()>, } -impl<'a, 'b, T: TokenKind<'a>> Token<'a, 'b, T> { - pub fn new(kind: T, src: &'b str, range: (usize, usize)) -> Self { - Token { - kind, - src, - range, - _phantom: PhantomData, - } +impl<'input, T: TokenTag> Token<'input, T> { + pub fn new(kind: T, src: &'input str, range: (usize, usize)) -> Self { + Token { kind, src, range } } - pub fn as_str(&self) -> &'b str { + pub fn as_str(&self) -> &'input str { let (l, r) = self.range; &self.src[l..r] } From 95733e6df644dc049e158b72ef14b3d7666dc79d Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Wed, 28 Aug 2024 20:21:05 +0900 Subject: [PATCH 07/55] =?UTF-8?q?[fix]=20TokenKind=20=E2=86=92=20TokenTag?= =?UTF-8?q?=20=E3=81=AB=E4=BC=B4=E3=81=86=E5=A4=89=E6=9B=B4=E5=BF=98?= =?UTF-8?q?=E3=82=8C=E3=81=AE=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 11 ++++++++ Cargo.toml | 2 ++ crates/core/src/error.rs | 5 ++-- crates/ir/src/lib.rs | 21 ++++++++------ crates/ir_sexp/src/lib.rs | 58 +++++++++++++++++++-------------------- 5 files changed, 55 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8f120d2..a00763b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,6 +31,7 @@ dependencies = [ "copager_lex_regex", "copager_parse", "copager_parse_lr1", + "copager_utils", "serde", "serde_json", ] @@ -90,6 +91,7 @@ version = "0.1.1" dependencies = [ "anyhow", "copager_cfg", + "copager_utils", "thiserror", ] @@ -100,6 +102,7 @@ dependencies = [ "anyhow", "copager_cfg", "copager_lex", + "copager_utils", "regex", "regex-macro", "thiserror", @@ -123,6 +126,14 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_utils" +version = "0.1.1" +dependencies = [ + "anyhow", + "thiserror", +] + [[package]] name = "either" version = "1.13.0" diff --git a/Cargo.toml b/Cargo.toml index 9f6d568..e2a385a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ copager_parse = { path = "./crates/parse" } copager_parse_lr1 = { path = "./crates/parse_lr1" } copager_ir = { path = "./crates/ir" } copager_ir_sexp = { path = "./crates/ir_sexp" } +copager_utils = { path = "./crates/utils" } [dev-dependencies] anyhow = { workspace = true } @@ -34,6 +35,7 @@ members = [ "./crates/parse_lr1", "./crates/ir", "./crates/ir_sexp", + "./crates/utils", ] exclude = [] diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index 16a3539..4c5f915 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -4,8 +4,7 @@ use std::fmt::Display; use thiserror::Error; -use copager_cfg::token::Token; -use copager_cfg::TokenKind; +use copager_cfg::token::{TokenTag, Token}; #[derive(Debug, Error)] pub struct ParseError { @@ -32,7 +31,7 @@ impl ParseError { } } - pub fn with<'a, T: TokenKind<'a>>(self, token: Token<'a, '_, T>) -> ParseError { + pub fn with<'input, T: TokenTag>(self, token: Token<'input, T>) -> ParseError { let mut sum = 0; let (mut rows, mut cols) = (1, 1); for c in token.src.chars() { diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 7c3b4b4..31dc628 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -1,17 +1,20 @@ -use copager_cfg::{RuleKind, TokenKind}; +use copager_cfg::token::TokenTag; +use copager_cfg::RuleKind; -pub trait IR<'a, T, R> +pub trait IR where - T: TokenKind<'a>, - R: RuleKind<'a, TokenKind = T>, + T: TokenTag, + R: RuleKind, { - type Builder: IRBuilder<'a>; + type Builder: IRBuilder; } -pub trait IRBuilder<'a> { - type TokenKind: TokenKind<'a>; - type RuleKind: RuleKind<'a, TokenKind = Self::TokenKind>; - type Output: IR<'a, Self::TokenKind, Self::RuleKind>; +pub trait IRBuilder +where + T: TokenTag, + R: RuleKind, +{ + type Output: IR; fn new() -> Self; fn build(self) -> anyhow::Result; diff --git a/crates/ir_sexp/src/lib.rs b/crates/ir_sexp/src/lib.rs index c2dc50d..7fe0d6c 100644 --- a/crates/ir_sexp/src/lib.rs +++ b/crates/ir_sexp/src/lib.rs @@ -1,26 +1,26 @@ use std::fmt::{Debug, Display}; -use copager_cfg::token::Token; -use copager_cfg::{RuleKind, TokenKind}; +use copager_cfg::token::{TokenTag, Token}; +use copager_cfg::RuleKind; use copager_ir::{IR, IRBuilder}; #[derive(Debug)] -pub enum SExp<'a, 'b, T, S> +pub enum SExp<'input, T, S> where - T: TokenKind<'a> + 'a, - S: RuleKind<'a, TokenKind = T>, + T: TokenTag, + S: RuleKind, { List { tag: S, - elems: Vec>, + elems: Vec>, }, - Atom(Token<'a, 'b, T>), + Atom(Token<'input, T>), } -impl<'a, T, S> Display for SExp<'a, '_, T, S> +impl Display for SExp<'_, T, S> where - T: TokenKind<'a> + 'a, - S: RuleKind<'a, TokenKind = T> + Debug, + T: TokenTag, + S: RuleKind + Debug, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -36,37 +36,35 @@ where } } -impl<'a, 'b, T, R> IR<'a, T, R> for SExp<'a, 'b, T, R> +impl<'input, T, R> IR for SExp<'input, T, R> where - T: TokenKind<'a> + 'a, - R: RuleKind<'a, TokenKind = T>, + T: TokenTag, + R: RuleKind, { - type Builder = SExpBuilder<'a, 'b, T, R>; + type Builder = SExpBuilder<'input, T, R>; } #[derive(Debug)] -pub struct SExpBuilder<'a, 'b, T, R> +pub struct SExpBuilder<'input, T, R> where - T: TokenKind<'a> + 'a, - R: RuleKind<'a, TokenKind = T>, + T: TokenTag, + R: RuleKind, { - stack: Vec>, + stack: Vec>, } -impl <'a, 'b, T, R> IRBuilder<'a> for SExpBuilder<'a, 'b, T, R> +impl <'input, T, R> IRBuilder for SExpBuilder<'input, T, R> where - T: TokenKind<'a> + 'a, - R: RuleKind<'a, TokenKind = T>, + T: TokenTag, + R: RuleKind, { - type TokenKind = T; - type RuleKind = R; - type Output = SExp<'a, 'b, T, R>; + type Output = SExp<'input, T, R>; - fn new() -> SExpBuilder<'a, 'b, T, R> { + fn new() -> SExpBuilder<'input, T, R> { SExpBuilder { stack: vec![] } } - fn build(mut self) -> anyhow::Result> { + fn build(mut self) -> anyhow::Result> { if self.stack.len() == 1 { Ok(self.stack.pop().unwrap()) } else { @@ -75,12 +73,12 @@ where } } -impl<'a, 'b, T, R> SExpBuilder<'a, 'b, T, R> +impl<'input, T, R> SExpBuilder<'input, T, R> where - T: TokenKind<'a> + 'a, - R: RuleKind<'a, TokenKind = T>, + T: TokenTag, + R: RuleKind, { - pub fn push(&mut self, token: Token<'a, 'b, T>) { + pub fn push(&mut self, token: Token<'input, T>) { self.stack.push(SExp::Atom(token)); } From bd0d0f764214c6952b6de09035daa73c3b25f17f Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 29 Aug 2024 18:27:21 +0900 Subject: [PATCH 08/55] =?UTF-8?q?[update]=20TokenTag,=20RuleTag=20?= =?UTF-8?q?=E3=81=AB=20Debug=20=E3=82=92=E8=BC=89=E3=81=9B=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/rule.rs | 16 +++++++++++++--- crates/cfg/src/token.rs | 3 ++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index df65cea..5a61f72 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -1,9 +1,19 @@ use std::collections::HashMap; +use std::fmt::Debug; use std::hash::Hash; use crate::token::TokenTag; -#[derive(PartialEq, Eq, Hash, Debug)] +pub trait RuleTag +where + Self: Debug + Copy + Clone + Hash + Eq, +{ + type TokenTag: TokenTag; + + fn as_rules<'a, 'b>(&'a self) -> Vec<&'b Rule>; +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Rule { pub id: usize, pub lhs: RuleElem, @@ -36,7 +46,7 @@ impl Rule { } } -#[derive(Debug, Eq)] +#[derive(Debug, Clone, Eq)] pub enum RuleElem { NonTerm(String), Term(T), @@ -74,7 +84,7 @@ impl RuleElem { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct RuleSet { pub top: String, pub rules: Vec>, diff --git a/crates/cfg/src/token.rs b/crates/cfg/src/token.rs index a73cde2..1469f80 100644 --- a/crates/cfg/src/token.rs +++ b/crates/cfg/src/token.rs @@ -1,8 +1,9 @@ +use std::fmt::Debug; use std::hash::Hash; pub trait TokenTag where - Self: Copy + Clone + Hash + Eq, + Self: Debug + Copy + Clone + Hash + Eq, { fn as_str<'a, 'b>(&'a self) -> &'b str; } From f9e1df37261cf6686fcc924b9cb70c9fa7dccdab Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 29 Aug 2024 23:56:17 +0900 Subject: [PATCH 09/55] =?UTF-8?q?[update]=20{Lex,=20Parse}x{Source,=20Iter?= =?UTF-8?q?ator}=20=E3=81=AE=E8=A8=AD=E8=A8=88=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/lex/src/lib.rs | 54 +----- crates/lex_regex/src/lib.rs | 32 ++-- crates/parse/Cargo.toml | 6 +- crates/parse/src/lib.rs | 40 ++++- crates/parse_lr1/Cargo.toml | 4 + crates/parse_lr1/src/builder.rs | 166 +++++++++---------- crates/parse_lr1/src/driver.rs | 67 -------- crates/parse_lr1/src/error.rs | 8 +- crates/parse_lr1/src/lib.rs | 280 +++++++++++++++++++++----------- 9 files changed, 343 insertions(+), 314 deletions(-) delete mode 100644 crates/parse_lr1/src/driver.rs diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs index 02f8dec..f3b1cb9 100644 --- a/crates/lex/src/lib.rs +++ b/crates/lex/src/lib.rs @@ -1,59 +1,19 @@ -use std::marker::PhantomData; - use copager_cfg::token::{TokenTag, Token}; -use copager_utils::cache::Cacheable; - -pub struct Lexer<'cache, 'input, T, S, I> -where - T: TokenTag, - S: LexSource, - I: LexIterator<'cache, 'input, T, S>, -{ - cache: I::Cache, - _phantom_s: PhantomData, - _phantom_t: PhantomData<&'input T>, -} - -impl<'cache, 'input, T, S, I> Lexer<'cache, 'input, T, S, I> -where - T: TokenTag, - S: LexSource, - I: LexIterator<'cache, 'input, T, S>, -{ - pub fn new() -> anyhow::Result - where - S: Default, - { - Self::try_from(S::default()) - } - pub fn try_from(source: S) -> anyhow::Result { - Ok(Lexer { - cache: I::new(source)?, - _phantom_s: PhantomData, - _phantom_t: PhantomData, - }) - } - - pub fn iter(&'cache self, input: &'input str) -> I { - I::restore(&self.cache).init(input) - } -} +pub trait LexSource { + type Tag: TokenTag; -pub trait LexSource -where - T: TokenTag, -{ fn ignore_token(&self) -> &str; - fn iter(&self) -> impl Iterator; + fn iter(&self) -> impl Iterator; } -pub trait LexIterator<'cache, 'input, T, S> +pub trait LexIterator<'input, T> where - Self: Sized + Cacheable<'cache, S>, + Self: Sized + From, T: TokenTag, - S: LexSource, { + type From; + fn init(&self, input: &'input str) -> Self; fn next(&mut self) -> Option>; } diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index b51df79..f231c6b 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -4,29 +4,29 @@ use copager_cfg::token::{TokenTag, Token}; use copager_lex::{LexSource, LexIterator}; use copager_utils::cache::Cacheable; -struct RegexLexer<'cache, 'input, T: TokenTag> { +struct RegexLexer<'cache, 'input, S: LexSource> { // regex regex_istr: &'cache Regex, regex_set: &'cache RegexSet, - regex_map: &'cache Vec<(Regex, T)>, + regex_map: &'cache Vec<(Regex, S::Tag)>, // state input: &'input str, pos: usize, } -struct RegexLexerCache { +struct RegexLexerCache { regex_istr: Regex, regex_set: RegexSet, - regex_map: Vec<(Regex, T)>, + regex_map: Vec<(Regex, S::Tag)>, } -impl<'cache, 'input, T, S> Cacheable<'cache, S> for RegexLexer<'cache, 'input, T> +impl<'cache, 'input, T, S> Cacheable<'cache, S> for RegexLexer<'cache, 'input, S> where T: TokenTag, - S: LexSource, + S: LexSource, { - type Cache = RegexLexerCache; + type Cache = RegexLexerCache; fn new(source: S) -> anyhow::Result { let regex_istr = Regex::new(source.ignore_token())?; @@ -56,11 +56,23 @@ where } } -impl<'cache, 'input, T, S> LexIterator<'cache, 'input, T, S> for RegexLexer<'cache, 'input, T> +impl<'cache, 'input, T, S> From<&'cache RegexLexerCache> for RegexLexer<'cache, 'input, S> where T: TokenTag, - S: LexSource, + S: LexSource, { + fn from(value: &'cache RegexLexerCache) -> Self { + Self::restore(value) + } +} + +impl<'cache, 'input, T, S> LexIterator<'input, T> for RegexLexer<'cache, 'input, S> +where + T: TokenTag, + S: LexSource + 'cache, +{ + type From = &'cache RegexLexerCache; + fn init(&self, input: &'input str) -> Self { RegexLexer { regex_istr: self.regex_istr, @@ -88,7 +100,7 @@ where .into_iter() .map(|idx| &self.regex_map[idx]) .map(|(regex, token)| (*token, regex.find(remain).unwrap().as_str())) - .collect::>(); + .collect::>(); matches.sort_by(|(_, a), (_, b)| a.len().cmp(&b.len())); // Update myself diff --git a/crates/parse/Cargo.toml b/crates/parse/Cargo.toml index 52a0d88..89ca74d 100644 --- a/crates/parse/Cargo.toml +++ b/crates/parse/Cargo.toml @@ -4,4 +4,8 @@ version = "0.1.1" edition = "2021" [dependencies] -lr1 = { package = "copager_parse_lr1", path = "../parse_lr1" } +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_utils = { path = "../utils" } diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 80e1952..2492fd0 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -1 +1,39 @@ -pub use lr1::LR1; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, RuleSet}; +use copager_lex::LexIterator; + +pub trait ParseSource { + type Tag: RuleTag; + + fn iter(&self) -> impl Iterator; + + fn into_ruleset(&self) -> RuleSet { + let set_id_for_all = |(id, tag): (usize, Self::Tag)| { + tag.as_rules() + .into_iter() + .map(move |rule| { + let mut rule = rule.clone(); + rule.id = id; + rule + }) + }; + + self.iter() + .enumerate() + .flat_map(set_id_for_all) + .collect::>() + } +} + +pub trait ParseIterator<'input, T, R, Il> +where + Self: From, + T: TokenTag, + R: RuleTag, + Il: LexIterator<'input, T>, +{ + type From; + + fn init(&self, lexer: Il) -> Self; + fn next(&mut self) -> Option<()>; +} diff --git a/crates/parse_lr1/Cargo.toml b/crates/parse_lr1/Cargo.toml index 05b7c16..14df8fa 100644 --- a/crates/parse_lr1/Cargo.toml +++ b/crates/parse_lr1/Cargo.toml @@ -9,3 +9,7 @@ thiserror = { workspace = true } serde = { workspace = true } itertools = "0.12.1" copager_core = { path = "../core" } +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_utils = { path = "../utils" } diff --git a/crates/parse_lr1/src/builder.rs b/crates/parse_lr1/src/builder.rs index a790b72..af3507d 100644 --- a/crates/parse_lr1/src/builder.rs +++ b/crates/parse_lr1/src/builder.rs @@ -1,48 +1,44 @@ use std::collections::{HashMap, HashSet}; use std::hash::Hash; -use std::marker::PhantomData; -use serde::{Serialize, Deserialize}; use itertools::Itertools; -use copager_core::cfg::{TokenSet, Syntax, Rule, RuleElem, RuleSet}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{Rule, RuleElem, RuleSet}; +use copager_lex::LexSource; +use copager_parse::ParseSource; -#[derive(Debug, Serialize, Deserialize)] -pub(super) enum LRAction { +#[derive(Debug)] +pub enum LRAction { Shift(usize), - Reduce(S, usize, usize), // syntax, goto_id, elems_cnt + Reduce(R, usize, usize), // tag, goto_id, elems_cnt Accept, None, } -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct LR1Configure<'a, T, S> +#[derive(Debug)] +pub struct LR1Configure where - T: TokenSet<'a>, - S: Syntax<'a, TokenSet = T>, + Sl: LexSource, + Sp: ParseSource, { - // LR Tables - pub action_table: Vec>>, - pub eof_action_table: Vec>, + pub action_table: Vec>>, + pub eof_action_table: Vec>, pub goto_table: Vec>, - - // PhantomData - tokenset: PhantomData<&'a T>, } -impl<'a, T, S> LR1Configure<'a, T, S> +impl LR1Configure where - T: TokenSet<'a>, - S: Syntax<'a, TokenSet = T>, + Sl: LexSource, + Sp: ParseSource, { - pub fn setup() -> anyhow::Result { + pub fn new(source_l: &Sl, source_p: &Sp) -> anyhow::Result { // 1. Pre-process - let rules = S::into_iter().collect::>(); - let ruleset = S::into_ruleset(); + let ruleset = source_p.into_ruleset(); let first_set = ruleset.first_set(); // 2. Generate dummy nonterm - let top_dummy: Rule = Rule::from(( + let top_dummy: Rule = Rule::from(( RuleElem::new_nonterm("__top_dummy"), vec![RuleElem::new_nonterm(&ruleset.top)], )); @@ -68,20 +64,21 @@ where } } - let mut action_table: Vec>> = Vec::with_capacity(dfa.0.len()); - let mut eof_action_table: Vec> = Vec::with_capacity(dfa.0.len()); + let mut action_table: Vec>> = Vec::with_capacity(dfa.0.len()); + let mut eof_action_table: Vec> = Vec::with_capacity(dfa.0.len()); let mut goto_table: Vec> = Vec::with_capacity(dfa.0.len()); for _ in 0..dfa.0.len() { action_table.push(HashMap::from_iter( - T::into_iter() + source_l.iter() .map(|token| (token, LRAction::None)) - .collect::)>>(), + .collect::)>>(), )); eof_action_table.push(LRAction::None); goto_table.push(vec![0; nonterm_table.keys().len()]); } // 5. Setup tables + let rule_tags = source_p.iter().collect::>(); for lritem_set in &dfa.0 { for (token, next) in &lritem_set.next { match &token { @@ -92,7 +89,7 @@ where } RuleElem::Term(t) => { let id = lritem_set.id as usize; - let label = action_table[id].get_mut(&t.0).unwrap(); + let label = action_table[id].get_mut(t).unwrap(); *label = LRAction::Shift(*next as usize); } _ => {} @@ -107,9 +104,9 @@ where for la_token in &item.la_tokens { if let RuleElem::Term(t) = la_token { let id = lritem_set.id as usize; - let label = action_table[id].get_mut(&t.0).unwrap(); + let label = action_table[id].get_mut(t).unwrap(); *label = LRAction::Reduce( - rules[item.rule.id as usize], + rule_tags[item.rule.id as usize], *nonterm_table.get(lhs).unwrap(), item.rule.rhs.len(), ); @@ -120,7 +117,7 @@ where LRAction::Accept } else { LRAction::Reduce( - rules[item.rule.id as usize], + rule_tags[item.rule.id as usize], *nonterm_table.get(lhs).unwrap(), item.rule.rhs.len(), ) @@ -135,23 +132,22 @@ where action_table, eof_action_table, goto_table, - tokenset: PhantomData, }) } } #[derive(Debug)] -struct LRItemDFA<'a, 'b, T: TokenSet<'a>> ( - Vec> +struct LRItemDFA<'a, T: TokenTag> ( + Vec> ); -impl<'a, 'b, T: TokenSet<'a>> LRItemDFA<'a, 'b, T> { +impl<'a, T: TokenTag> LRItemDFA<'a, T> { fn gen( - init_set: LRItemSet<'a, 'b, T>, - ruleset: &'b RuleSet<'a, T>, - first_set: &HashMap<&'b RuleElem<'a, T>, Vec<&'b RuleElem<'a, T>>>, - ) -> LRItemDFA<'a, 'b, T> { - let issue_id = |old_sets: &Vec>, set: &LRItemSet<'a, 'b, T>| { + init_set: LRItemSet<'a, T>, + ruleset: &'a RuleSet, + first_set: &HashMap<&'a RuleElem, Vec<&'a RuleElem>>, + ) -> LRItemDFA<'a, T> { + let issue_id = |old_sets: &Vec>, set: &LRItemSet<'a, T>| { if let Some(ex_set) = old_sets.iter().find(|&set0| set0.strict_eq(set)) { Err(ex_set.id) } else { @@ -187,29 +183,27 @@ impl<'a, 'b, T: TokenSet<'a>> LRItemDFA<'a, 'b, T> { } } -#[derive(Clone, Debug)] -struct LRItemSet<'a, 'b, T: TokenSet<'a>> { +#[derive(Clone, Debug, Eq)] +struct LRItemSet<'a, T: TokenTag> { id: i32, - next: HashMap<&'b RuleElem<'a, T>, i32>, - lr_items: HashSet>, + next: HashMap<&'a RuleElem, i32>, + lr_items: HashSet>, } -impl<'a, 'b, T: TokenSet<'a>> PartialEq for LRItemSet<'a, 'b, T> { - fn eq(&self, other: &LRItemSet<'a, 'b, T>) -> bool { +impl<'a, T: TokenTag> PartialEq for LRItemSet<'a, T> { + fn eq(&self, other: &LRItemSet<'a, T>) -> bool { self.lr_items == other.lr_items } } -impl<'a, 'b, T: TokenSet<'a>> PartialEq>> for LRItemSet<'a, 'b, T> { - fn eq(&self, other: &HashSet>) -> bool { +impl<'a, T: TokenTag> PartialEq>> for LRItemSet<'a, T> { + fn eq(&self, other: &HashSet>) -> bool { &self.lr_items == other } } -impl<'a, 'b, T: TokenSet<'a>> Eq for LRItemSet<'a, 'b, T> {} - -impl<'a, 'b, T: TokenSet<'a>> LRItemSet<'a, 'b, T> { - fn new(id: i32, lr_items: HashSet>) -> Self { +impl<'a, T: TokenTag> LRItemSet<'a, T> { + fn new(id: i32, lr_items: HashSet>) -> Self { LRItemSet { id, next: HashMap::new(), @@ -226,23 +220,23 @@ impl<'a, 'b, T: TokenSet<'a>> LRItemSet<'a, 'b, T> { .all(|item| other.lr_items.iter().any(|item_b| item_b.strict_eq(item))) } - fn expand_closure<'c>( + fn expand_closure<'b>( mut self, - ruleset: &'b RuleSet<'a, T>, - first_set: &'c HashMap<&'b RuleElem<'a, T>, Vec<&'b RuleElem<'a, T>>>, - ) -> LRItemSet<'a, 'b, T> { + ruleset: &'a RuleSet, + first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, + ) -> LRItemSet<'a, T> { let mut lr_items = self.lr_items.clone(); let mut lr_items_fetched = self.lr_items; loop { - let new_items: Vec> = lr_items_fetched + let new_items: Vec> = lr_items_fetched .iter() .flat_map(|item| item.expand_closure(ruleset, first_set)) .collect(); - let new_items = LRItem::<'_, '_, _>::unify_all(new_items); + let new_items = LRItem::<'_, _>::unify_all(new_items); let new_items = HashSet::from_iter(new_items); let bef_len = lr_items.len(); - lr_items = LRItem::<'_, '_, _>::unity_set(lr_items, new_items.clone()); + lr_items = LRItem::<'_, _>::unity_set(lr_items, new_items.clone()); let af_len = lr_items.len(); if bef_len == af_len { break; @@ -254,18 +248,18 @@ impl<'a, 'b, T: TokenSet<'a>> LRItemSet<'a, 'b, T> { self } - fn gen_next_sets<'c>( + fn gen_next_sets<'b>( &self, - ruleset: &'b RuleSet<'a, T>, - first_set: &'c HashMap<&'b RuleElem<'a, T>, Vec<&'b RuleElem<'a, T>>>, - ) -> HashMap<&'b RuleElem<'a, T>, LRItemSet<'a, 'b, T>> { - let new_items: Vec<(&'b RuleElem<'a, T>, LRItem<'a, 'b, T>)> = self + ruleset: &'a RuleSet, + first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, + ) -> HashMap<&'a RuleElem, LRItemSet<'a, T>> { + let new_items: Vec<(&'a RuleElem, LRItem<'a, T>)> = self .lr_items .iter() .filter_map(|lr_item| lr_item.next_dot()) .collect(); - let mut new_sets: HashMap<&RuleElem, HashSet>> = HashMap::new(); + let mut new_sets: HashMap<&RuleElem, HashSet>> = HashMap::new(); for (bef_token, lr_item) in new_items { if new_sets.get(&bef_token).is_none() { new_sets.insert(bef_token, HashSet::new()); @@ -273,7 +267,7 @@ impl<'a, 'b, T: TokenSet<'a>> LRItemSet<'a, 'b, T> { new_sets.get_mut(&bef_token).unwrap().insert(lr_item); } - let mut new_sets_expanded: HashMap<&'b RuleElem<'a, T>, LRItemSet<'_, '_, _>> = HashMap::new(); + let mut new_sets_expanded: HashMap<&'a RuleElem, LRItemSet<'_, _>> = HashMap::new(); for (ktoken, new_set) in new_sets { let new_set = LRItemSet::new(0, new_set); let new_set = new_set.expand_closure(ruleset, first_set); @@ -284,30 +278,28 @@ impl<'a, 'b, T: TokenSet<'a>> LRItemSet<'a, 'b, T> { } } -#[derive(Clone, Debug)] -struct LRItem<'a, 'b, T: TokenSet<'a>> { - rule: &'b Rule<'a, T>, +#[derive(Clone, Debug, Eq)] +struct LRItem<'a, T: TokenTag> { + rule: &'a Rule, dot_pos: usize, - la_tokens: HashSet<&'b RuleElem<'a, T>>, + la_tokens: HashSet<&'a RuleElem>, } -impl<'a, 'b, T: TokenSet<'a>> Hash for LRItem<'a, 'b, T> { +impl<'a, T: TokenTag> Hash for LRItem<'a, T> { fn hash(&self, state: &mut H) { self.rule.hash(state); self.dot_pos.hash(state); } } -impl<'a, 'b, T: TokenSet<'a>> PartialEq for LRItem<'a, 'b, T> { +impl<'a, T: TokenTag> PartialEq for LRItem<'a, T> { fn eq(&self, other: &Self) -> bool { self.rule == other.rule && self.dot_pos == other.dot_pos } } -impl<'a, 'b, T: TokenSet<'a>> Eq for LRItem<'a, 'b, T> {} - -impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { - fn new(rule: &'b Rule<'a, T>, la_tokens: HashSet<&'b RuleElem<'a, T>>) -> LRItem<'a, 'b, T> { +impl<'a, T: TokenTag> LRItem<'a, T> { + fn new(rule: &'a Rule, la_tokens: HashSet<&'a RuleElem>) -> LRItem<'a, T> { LRItem { rule, dot_pos: 0, @@ -321,11 +313,11 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { && self.la_tokens == other.la_tokens } - fn expand_closure<'c>( + fn expand_closure<'b>( &self, - ruleset: &'b RuleSet<'a, T>, - first_set: &'c HashMap<&'b RuleElem<'a, T>, Vec<&'b RuleElem<'a, T>>>, - ) -> HashSet> { + ruleset: &'a RuleSet, + first_set: &'b HashMap<&'a RuleElem, Vec<&'a RuleElem>>, + ) -> HashSet> { let af_la_tokens = if self.dot_pos + 1 < self.rule.rhs.len() { HashSet::from_iter( first_set @@ -343,7 +335,7 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { ruleset .find_rule(&self.rule.rhs[self.dot_pos]) .into_iter() - .map(|rule| LRItem::<'_, '_, _>::new(rule, af_la_tokens.clone())) + .map(|rule| LRItem::<'_, _>::new(rule, af_la_tokens.clone())) .collect() } else { HashSet::new() @@ -351,7 +343,7 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { } #[allow(clippy::int_plus_one)] - fn next_dot(&self) -> Option<(&'b RuleElem<'a, T>, LRItem<'a, 'b, T>)> { + fn next_dot(&self) -> Option<(&'a RuleElem, LRItem<'a, T>)> { if self.dot_pos + 1 <= self.rule.rhs.len() { let bef_token = &self.rule.rhs[self.dot_pos]; let item = LRItem { @@ -365,7 +357,7 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { } } - fn unify(&mut self, other: LRItem<'a, 'b, T>) { + fn unify(&mut self, other: LRItem<'a, T>) { if self != &other { return; } @@ -376,7 +368,7 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { }); } - fn unify_all(mut items: Vec>) -> Vec> { + fn unify_all(mut items: Vec>) -> Vec> { for idx in (0..items.len()).permutations(2) { let (a_idx, b_idx) = (idx[0], idx[1]); let tmp = items[b_idx].clone(); @@ -386,9 +378,9 @@ impl<'a, 'b, T: TokenSet<'a>> LRItem<'a, 'b, T> { } fn unity_set( - items_a: HashSet>, - items_b: HashSet>, - ) -> HashSet> { + items_a: HashSet>, + items_b: HashSet>, + ) -> HashSet> { let mut items_a = Vec::from_iter(items_a); let items_b = Vec::from_iter(items_b); items_a.extend(items_b); diff --git a/crates/parse_lr1/src/driver.rs b/crates/parse_lr1/src/driver.rs deleted file mode 100644 index 9c7049a..0000000 --- a/crates/parse_lr1/src/driver.rs +++ /dev/null @@ -1,67 +0,0 @@ -use copager_core::cfg::{TokenSet, Syntax}; -use copager_core::lex::Token; -use copager_core::parse::{SExp, SExpBuilder}; - -use crate::error::ParseError; -use crate::builder::{LRAction, LR1Configure}; - -pub(super) struct LR1Driver<'a, 'b, T, S> (&'b LR1Configure<'a, T, S>) -where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>; - -impl<'a, 'b, T, S> LR1Driver<'a, 'b, T, S> -where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>, -{ - pub fn new(configure: &'b LR1Configure<'a, T, S>) -> LR1Driver<'a, 'b, T, S> { - LR1Driver(configure) - } - - pub fn run<'c>( - &self, - lexer: &mut impl Iterator>, - ) -> anyhow::Result> { - let mut stack = vec![0]; - let mut builder = SExpBuilder::new(); - loop { - let input = lexer.next(); - loop { - let top = stack[stack.len() - 1]; - let action = match input { - Some(token) => ( - self.0.action_table[top].get(&token.kind).unwrap(), - Some(token), - ), - None => ( - &self.0.eof_action_table[top], - None - ), - }; - match action { - (LRAction::Shift(new_state), Some(token)) => { - stack.push(*new_state); - builder.push(token); - break; - } - (LRAction::Reduce(tag, goto, elems_cnt), _) => { - stack.truncate(stack.len() - elems_cnt); - stack.push(self.0.goto_table[stack[stack.len() - 1]][*goto]); - builder.wrap(*tag, *elems_cnt); - } - (LRAction::Accept, _) => { - return builder.build(); - } - (LRAction::None, Some(token)) => { - return Err(ParseError::new_unexpected_token(token).into()); - } - (LRAction::None, None) => { - return Err(ParseError::UnexpectedEOF.into()); - } - _ => unreachable!(), - } - } - } - } -} diff --git a/crates/parse_lr1/src/error.rs b/crates/parse_lr1/src/error.rs index f0faa25..4cbb467 100644 --- a/crates/parse_lr1/src/error.rs +++ b/crates/parse_lr1/src/error.rs @@ -1,8 +1,7 @@ use thiserror::Error; use copager_core::error::ParseError as SuperParseError; -use copager_core::cfg::TokenSet; -use copager_core::lex::Token; +use copager_cfg::token::{TokenTag, Token}; #[derive(Debug, Error)] pub enum ParseError { @@ -15,10 +14,7 @@ pub enum ParseError { } impl ParseError { - pub fn new_unexpected_token<'a, T>(expected: Token<'a, '_, T>) -> SuperParseError - where - T: TokenSet<'a>, - { + pub fn new_unexpected_token(expected: Token) -> SuperParseError { let err = ParseError::UnexpectedToken { actual: format!("{:?}", expected.kind), }; diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index ea43c1b..b4668fa 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -1,120 +1,210 @@ mod error; mod builder; -mod driver; -use serde::{Serialize, Deserialize}; +use std::marker::PhantomData; -use copager_core::cfg::{TokenSet, Syntax}; -use copager_core::lex::Token; -use copager_core::parse::{ParserImpl, SExp}; +use copager_lex::{LexSource, LexIterator}; +use copager_parse::{ParseSource, ParseIterator}; +use copager_utils::cache::Cacheable; -use builder::LR1Configure; -use driver::LR1Driver; +use builder::{LR1Configure, LRAction}; +use error::ParseError; -#[derive(Debug, Serialize, Deserialize)] -pub struct LR1<'a, T, S> (LR1Configure<'a, T, S>) +pub struct LR1<'cache, 'input, Sl, Il, Sp> where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>; + Sl: LexSource, + Il: LexIterator<'input, Sl::Tag>, + Sp: ParseSource, +{ + // LR-Table + tables: &'cache LR1Configure, + + // States + lexer: Option, + stack: Option>, -impl<'a, T, S> ParserImpl<'a> for LR1<'a, T, S> + // Phantom Data + _phantom: PhantomData<&'input ()>, +} + +impl<'cache, 'input, Sl, Il, Sp> Cacheable<'cache, (Sl, Sp)> for LR1<'cache, 'input, Sl, Il, Sp> where - T: TokenSet<'a> + 'a, - S: Syntax<'a, TokenSet = T>, + Sl: LexSource, + Il: LexIterator<'input, Sl::Tag>, + Sp: ParseSource, { - type TokenSet = T; - type Syntax = S; + type Cache = LR1Configure; - fn setup() -> anyhow::Result { - Ok(LR1(LR1Configure::setup()?)) + fn new((source_l, source_p): (Sl, Sp)) -> anyhow::Result { + Ok(LR1Configure::new(&source_l, &source_p)?) } - fn parse<'b>( - &self, - mut lexer: impl Iterator>, - ) -> anyhow::Result> { - LR1Driver::new(&self.0).run(&mut lexer) + fn restore(tables: &'cache Self::Cache) -> Self { + LR1 { + tables, + lexer: None, + stack: None, + _phantom: PhantomData, + } } } -#[cfg(test)] -mod test { - use copager_core::cfg::{TokenSet, Syntax, Rule, RuleElem}; - use copager_core::Parser; - - use super::LR1; - - #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] - enum TestTokenSet { - #[token(regex = r"\+")] - Plus, - #[token(regex = r"-")] - Minus, - #[token(regex = r"\*")] - Mul, - #[token(regex = r"/")] - Div, - #[token(regex = r"\(")] - BracketL, - #[token(regex = r"\)")] - BracketR, - #[token(regex = r"[1-9][0-9]*")] - Num, - #[token(regex = r"[ \t\n]+", ignored)] - _Whitespace, +impl<'cache, 'input, Sl, Il, Sp> From<&'cache LR1Configure> for LR1<'cache, 'input, Sl, Il, Sp> +where + Sl: LexSource, + Il: LexIterator<'input, Sl::Tag>, + Sp: ParseSource, +{ + fn from(tables: &'cache LR1Configure) -> Self { + Self::restore(tables) } +} - #[derive(Debug, Clone, Copy, Syntax)] - enum TestSyntax { - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, - } +impl<'cache, 'input, Sl, Il, Sp> ParseIterator<'input, Sl::Tag, Sp::Tag, Il> for LR1<'cache, 'input, Sl, Il, Sp> +where + Sl: LexSource, + Il: LexIterator<'input, Sl::Tag>, + Sp: ParseSource, +{ + type From = &'cache LR1Configure; - #[test] - fn input_ok() { - let inputs = vec![ - "10", - "10 + 20", - "10 - 20", - "10 * 20", - "10 / 20", - "10 + 20 * 30 - 40", - "(10)", - "((((10))))", - "10 * (20 - 30)", - "((10 + 20) * (30 / 40)) - 50", - ]; - - let parser = Parser::>::new().unwrap(); - for input in inputs { - assert!(parser.parse(input).is_ok(), "{}", input); + fn init(&self, lexer: Il) -> Self { + LR1 { + tables: &self.tables, + lexer: Some(lexer), + stack: Some(Vec::new()), + _phantom: PhantomData, } } - #[test] - fn input_err() { - let inputs = vec![ - "()", - "(10 -", - "10 +", - "*", - "10 20 + 30", - "10 + 20 * 30 / 40 (", - "(((10))", - ]; - - let parser = Parser::>::new().unwrap(); - for input in inputs { - assert!(parser.parse(input).is_err(), "{}", input); + fn next(&mut self) -> Option<()> { + let lexer = self.lexer.as_mut().unwrap(); + let stack = self.stack.as_mut().unwrap(); + loop { + let input = lexer.next(); + loop { + let top = stack[stack.len() - 1]; + let action = match input { + Some(token) => ( + self.tables.action_table[top].get(&token.kind).unwrap(), + Some(token), + ), + None => ( + &self.tables.eof_action_table[top], + None + ), + }; + match action { + (LRAction::Shift(new_state), Some(token)) => { + stack.push(*new_state); + // builder.push(token); + println!("Shift: {:?}", token); + break; + } + (LRAction::Reduce(tag, goto, elems_cnt), _) => { + stack.truncate(stack.len() - elems_cnt); + stack.push(self.tables.goto_table[stack[stack.len() - 1]][*goto]); + // builder.wrap(*tag, *elems_cnt); + println!("Reduce: {:?}", tag); + } + (LRAction::Accept, _) => { + // return builder.build(); + return Some(()); + } + (LRAction::None, Some(token)) => { + // return Err(ParseError::new_unexpected_token(token).into()); + println!("Done!"); + return None; + } + (LRAction::None, None) => { + // return Err(ParseError::UnexpectedEOF.into()); + return None; + } + _ => unreachable!(), + } + } } } } + +// #[cfg(test)] +// mod test { +// use copager_core::cfg::{TokenSet, Syntax, Rule, RuleElem}; +// use copager_core::Parser; + +// use super::LR1; + +// #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] +// enum TestTokenSet { +// #[token(regex = r"\+")] +// Plus, +// #[token(regex = r"-")] +// Minus, +// #[token(regex = r"\*")] +// Mul, +// #[token(regex = r"/")] +// Div, +// #[token(regex = r"\(")] +// BracketL, +// #[token(regex = r"\)")] +// BracketR, +// #[token(regex = r"[1-9][0-9]*")] +// Num, +// #[token(regex = r"[ \t\n]+", ignored)] +// _Whitespace, +// } + +// #[derive(Debug, Clone, Copy, Syntax)] +// enum TestSyntax { +// #[rule(" ::= Plus ")] +// #[rule(" ::= Minus ")] +// #[rule(" ::= ")] +// Expr, +// #[rule(" ::= Mul ")] +// #[rule(" ::= Div ")] +// #[rule(" ::= ")] +// Term, +// #[rule(" ::= BracketL BracketR")] +// #[rule(" ::= Num")] +// Num, +// } + +// #[test] +// fn input_ok() { +// let inputs = vec![ +// "10", +// "10 + 20", +// "10 - 20", +// "10 * 20", +// "10 / 20", +// "10 + 20 * 30 - 40", +// "(10)", +// "((((10))))", +// "10 * (20 - 30)", +// "((10 + 20) * (30 / 40)) - 50", +// ]; + +// let parser = Parser::>::new().unwrap(); +// for input in inputs { +// assert!(parser.parse(input).is_ok(), "{}", input); +// } +// } + +// #[test] +// fn input_err() { +// let inputs = vec![ +// "()", +// "(10 -", +// "10 +", +// "*", +// "10 20 + 30", +// "10 + 20 * 30 / 40 (", +// "(((10))", +// ]; + +// let parser = Parser::>::new().unwrap(); +// for input in inputs { +// assert!(parser.parse(input).is_err(), "{}", input); +// } +// } +// } From 83eeb21ebcb97cfccdf4e09e09f92332582b4c89 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 29 Aug 2024 23:56:52 +0900 Subject: [PATCH 10/55] =?UTF-8?q?[add]=20Processor=20=E6=A7=8B=E9=80=A0?= =?UTF-8?q?=E4=BD=93=E3=82=92=E4=BB=AE=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ひとまずコンパイルは通るようになった --- Cargo.lock | 12 ++++- crates/core/Cargo.toml | 2 + crates/core/src/lib.rs | 54 ++++++++++++--------- examples/expr.rs | 107 +++++++++++++++++++++-------------------- src/lib.rs | 8 +-- tests/derive.rs | 78 +++++++++++++++--------------- tests/serde.rs | 94 ++++++++++++++++++------------------ 7 files changed, 187 insertions(+), 168 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a00763b..bf4fe15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,6 +62,8 @@ dependencies = [ "anyhow", "copager_cfg", "copager_lex", + "copager_parse", + "copager_utils", "serde", "thiserror", ] @@ -112,7 +114,11 @@ dependencies = [ name = "copager_parse" version = "0.1.1" dependencies = [ - "copager_parse_lr1", + "anyhow", + "copager_cfg", + "copager_lex", + "copager_utils", + "thiserror", ] [[package]] @@ -120,7 +126,11 @@ name = "copager_parse_lr1" version = "0.1.1" dependencies = [ "anyhow", + "copager_cfg", "copager_core", + "copager_lex", + "copager_parse", + "copager_utils", "itertools", "serde", "thiserror", diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 078ce68..2c1a054 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -9,3 +9,5 @@ thiserror = { workspace = true } serde = { workspace = true } copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } +copager_utils = { path = "../utils" } diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 0c5e3bf..15a7099 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -2,36 +2,42 @@ pub mod error; use std::marker::PhantomData; -use serde::{Serialize, Deserialize}; +use copager_lex::{LexSource, LexIterator}; +use copager_parse::{ParseSource, ParseIterator}; -use copager_lex::Lexer; - -#[derive(Debug, Serialize, Deserialize)] -pub struct Parser<'a, Algorithm> +pub struct Processor<'input, Sl, Il, Sp, Ip> where - Algorithm: ParserImpl<'a>, + Sl: LexSource, + Il: LexIterator<'input, Sl::Tag>, + Sp: ParseSource, + Ip: ParseIterator<'input, Sl::Tag, Sp::Tag, Il>, { - r#impl: Algorithm, - phantom: PhantomData<&'a ()>, + _phantom_sl: PhantomData, + _phantom_il: PhantomData, + _phantom_sp: PhantomData, + _phantom_ip: PhantomData, + _phantom_input: PhantomData<&'input ()>, } -#[allow(clippy::new_without_default)] -impl<'a, Algorithm> Parser<'a, Algorithm> +impl<'input, 'cache, Sl, Il, Sp, Ip> Processor<'input, Sl, Il, Sp, Ip> where - Algorithm: ParserImpl<'a>, + Sl: LexSource, + Il: LexIterator<'input, Sl::Tag, From = Sl>, + Sp: ParseSource, + Ip: ParseIterator<'input, Sl::Tag, Sp::Tag, Il, From = Sp>, { - pub fn new() -> anyhow::Result> { - Ok(Parser { - r#impl: Algorithm::setup()?, - phantom: PhantomData, - }) - } - - pub fn parse<'b>( - &self, - input: &'b str, - ) -> anyhow::Result> { - let lexer = Lexer::new::(input)?; - self.r#impl.parse(lexer) + pub fn process(input: &'input str) + where + Sl: Default, + Sp: Default, + { + let lexer = Il::from(Sl::default()).init(input); + let mut parser = Ip::from(Sp::default()).init(lexer); + loop { + match parser.next() { + Some(_) => {} + None => break, + } + } } } diff --git a/examples/expr.rs b/examples/expr.rs index a00812e..6671500 100644 --- a/examples/expr.rs +++ b/examples/expr.rs @@ -1,60 +1,61 @@ -use std::io::stdin; +// use std::io::stdin; -use copager::algorithm::LR1; -use copager::cfg::*; -use copager::error::ParseError; -use copager::Parser; +// use copager::algorithm::LR1; +// use copager::cfg::*; +// use copager::error::ParseError; +// use copager::Parser; -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] -enum ExprTokenSet { - #[token(regex = r"\+")] - Plus, - #[token(regex = r"-")] - Minus, - #[token(regex = r"\*")] - Mul, - #[token(regex = r"/")] - Div, - #[token(regex = r"\(")] - BracketL, - #[token(regex = r"\)")] - BracketR, - #[token(regex = r"[1-9][0-9]*")] - Num, - #[token(regex = r"[ \t\n]+", ignored)] - _Whitespace, -} +// #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] +// enum ExprTokenSet { +// #[token(regex = r"\+")] +// Plus, +// #[token(regex = r"-")] +// Minus, +// #[token(regex = r"\*")] +// Mul, +// #[token(regex = r"/")] +// Div, +// #[token(regex = r"\(")] +// BracketL, +// #[token(regex = r"\)")] +// BracketR, +// #[token(regex = r"[1-9][0-9]*")] +// Num, +// #[token(regex = r"[ \t\n]+", ignored)] +// _Whitespace, +// } -#[derive(Debug, Clone, Copy, Syntax)] -enum ExprSyntax { - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} +// #[derive(Debug, Clone, Copy, Syntax)] +// enum ExprSyntax { +// #[rule(" ::= Plus ")] +// #[rule(" ::= Minus ")] +// #[rule(" ::= ")] +// Expr, +// #[rule(" ::= Mul ")] +// #[rule(" ::= Div ")] +// #[rule(" ::= ")] +// Term, +// #[rule(" ::= BracketL BracketR")] +// #[rule(" ::= Num")] +// Num, +// } -type ExprParser<'a> = Parser::<'a, LR1<'a, ExprTokenSet, ExprSyntax>>; +// type ExprParser<'a> = Parser::<'a, LR1<'a, ExprTokenSet, ExprSyntax>>; -fn main() -> anyhow::Result<()> { - let mut input = String::new(); - stdin().read_line(&mut input)?; +// fn main() -> anyhow::Result<()> { +// let mut input = String::new(); +// stdin().read_line(&mut input)?; - match ExprParser::new()?.parse(&input) { - Ok(sexp) => println!("Accepted : {}", sexp), - Err(e) => { - if let Some(e) = e.downcast_ref::() { - e.pretty_print(); - } - println!("Rejected : {}", e); - } - }; +// match ExprParser::new()?.parse(&input) { +// Ok(sexp) => println!("Accepted : {}", sexp), +// Err(e) => { +// if let Some(e) = e.downcast_ref::() { +// e.pretty_print(); +// } +// println!("Rejected : {}", e); +// } +// }; - Ok(()) -} +// Ok(()) +// } +fn main() {} diff --git a/src/lib.rs b/src/lib.rs index 13023d4..f33b1ed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -pub use copager_core::*; -pub use copager_cfg as cfg; -pub use copager_lex as lex; -pub use copager_algorithm as algorithm; +// pub use copager_core::*; +// pub use copager_cfg as cfg; +// pub use copager_lex as lex; +// pub use copager_algorithm as algorithm; diff --git a/tests/derive.rs b/tests/derive.rs index 46dcfab..196cd67 100644 --- a/tests/derive.rs +++ b/tests/derive.rs @@ -1,42 +1,42 @@ -use copager::cfg::*; +// use copager::cfg::*; -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] -enum TestTokenSet { - #[token(regex = r"\+")] - Plus, - #[token(regex = r"-")] - Minus, - #[token(regex = r"\*")] - Mul, - #[token(regex = r"/")] - Div, - #[token(regex = r"\(")] - BracketL, - #[token(regex = r"\)")] - BracketR, - #[token(regex = r"[1-9][0-9]*")] - Num, - #[token(regex = r"[ \t\n]+", ignored)] - _Whitespace, -} +// #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] +// enum TestTokenSet { +// #[token(regex = r"\+")] +// Plus, +// #[token(regex = r"-")] +// Minus, +// #[token(regex = r"\*")] +// Mul, +// #[token(regex = r"/")] +// Div, +// #[token(regex = r"\(")] +// BracketL, +// #[token(regex = r"\)")] +// BracketR, +// #[token(regex = r"[1-9][0-9]*")] +// Num, +// #[token(regex = r"[ \t\n]+", ignored)] +// _Whitespace, +// } -#[derive(Debug, Clone, Copy, Syntax)] -enum TestSyntax { - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} +// #[derive(Debug, Clone, Copy, Syntax)] +// enum TestSyntax { +// #[rule(" ::= Plus ")] +// #[rule(" ::= Minus ")] +// #[rule(" ::= ")] +// Expr, +// #[rule(" ::= Mul ")] +// #[rule(" ::= Div ")] +// #[rule(" ::= ")] +// Term, +// #[rule(" ::= BracketL BracketR")] +// #[rule(" ::= Num")] +// Num, +// } -#[test] -fn check_compile() { - let _ = TestTokenSet::into_regex(&self::TestTokenSet::Plus); - let _ = TestSyntax::into_rules(&self::TestSyntax::Expr); -} +// #[test] +// fn check_compile() { +// let _ = TestTokenSet::into_regex(&self::TestTokenSet::Plus); +// let _ = TestSyntax::into_rules(&self::TestSyntax::Expr); +// } diff --git a/tests/serde.rs b/tests/serde.rs index da7ab5c..a928b5b 100644 --- a/tests/serde.rs +++ b/tests/serde.rs @@ -1,53 +1,53 @@ -use serde::{Serialize, Deserialize}; +// use serde::{Serialize, Deserialize}; -use copager::algorithm::LR1; -use copager::cfg::*; -use copager::Parser; +// use copager::algorithm::LR1; +// use copager::cfg::*; +// use copager::Parser; -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, TokenSet)] -enum TestTokenSet { - #[token(regex = r"\+")] - Plus, - #[token(regex = r"-")] - Minus, - #[token(regex = r"\*")] - Mul, - #[token(regex = r"/")] - Div, - #[token(regex = r"\(")] - BracketL, - #[token(regex = r"\)")] - BracketR, - #[token(regex = r"[1-9][0-9]*")] - Num, - #[token(regex = r"[ \t\n]+", ignored)] - _Whitespace, -} +// #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, TokenSet)] +// enum TestTokenSet { +// #[token(regex = r"\+")] +// Plus, +// #[token(regex = r"-")] +// Minus, +// #[token(regex = r"\*")] +// Mul, +// #[token(regex = r"/")] +// Div, +// #[token(regex = r"\(")] +// BracketL, +// #[token(regex = r"\)")] +// BracketR, +// #[token(regex = r"[1-9][0-9]*")] +// Num, +// #[token(regex = r"[ \t\n]+", ignored)] +// _Whitespace, +// } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, Syntax)] -enum TestSyntax { - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} +// #[derive(Debug, Clone, Copy, Serialize, Deserialize, Syntax)] +// enum TestSyntax { +// #[rule(" ::= Plus ")] +// #[rule(" ::= Minus ")] +// #[rule(" ::= ")] +// Expr, +// #[rule(" ::= Mul ")] +// #[rule(" ::= Div ")] +// #[rule(" ::= ")] +// Term, +// #[rule(" ::= BracketL BracketR")] +// #[rule(" ::= Num")] +// Num, +// } -type TestParser<'a> = Parser::<'a, LR1<'a, TestTokenSet, TestSyntax>>; +// type TestParser<'a> = Parser::<'a, LR1<'a, TestTokenSet, TestSyntax>>; -#[test] -fn check_serde() { - // build.rs - let parser = TestParser::new().unwrap(); - let serialized = serde_json::to_string(&parser).unwrap(); +// #[test] +// fn check_serde() { +// // build.rs +// let parser = TestParser::new().unwrap(); +// let serialized = serde_json::to_string(&parser).unwrap(); - // main.rs - let deserialized: TestParser = serde_json::from_str(&serialized).unwrap(); - deserialized.parse("10 * (20 - 30)").unwrap(); -} +// // main.rs +// let deserialized: TestParser = serde_json::from_str(&serialized).unwrap(); +// deserialized.parse("10 * (20 - 30)").unwrap(); +// } From 56c88420a001915035c34beee89f340194779179 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 29 Aug 2024 23:59:01 +0900 Subject: [PATCH 11/55] =?UTF-8?q?[change]=20core,=20parse=5Flr1=20?= =?UTF-8?q?=E3=81=AE=20serde=20=E3=81=B8=E3=81=AE=E4=BE=9D=E5=AD=98?= =?UTF-8?q?=E3=82=92=E4=B8=80=E6=97=A6=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 2 -- crates/core/Cargo.toml | 1 - crates/parse_lr1/Cargo.toml | 1 - 3 files changed, 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf4fe15..50365c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -64,7 +64,6 @@ dependencies = [ "copager_lex", "copager_parse", "copager_utils", - "serde", "thiserror", ] @@ -132,7 +131,6 @@ dependencies = [ "copager_parse", "copager_utils", "itertools", - "serde", "thiserror", ] diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 2c1a054..b379073 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde = { workspace = true } copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } diff --git a/crates/parse_lr1/Cargo.toml b/crates/parse_lr1/Cargo.toml index 14df8fa..dda421c 100644 --- a/crates/parse_lr1/Cargo.toml +++ b/crates/parse_lr1/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde = { workspace = true } itertools = "0.12.1" copager_core = { path = "../core" } copager_cfg = { path = "../cfg" } From a0da7de2d2c1de45c3b6fcafa1a9968e4778367c Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Fri, 30 Aug 2024 00:14:37 +0900 Subject: [PATCH 12/55] =?UTF-8?q?[update]=20Cachable::Cache=20=E3=81=AB=20?= =?UTF-8?q?Serialize&Deserialize=20=E3=82=92=E8=A6=81=E6=B1=82=E3=81=99?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 3 ++ crates/cfg/Cargo.toml | 1 + crates/cfg/src/rule.rs | 4 ++- crates/cfg/src/token.rs | 4 ++- crates/lex_regex/src/lib.rs | 58 +++++++++------------------------ crates/parse_lr1/Cargo.toml | 1 + crates/parse_lr1/src/builder.rs | 5 +-- crates/parse_lr1/src/lib.rs | 3 ++ crates/utils/Cargo.toml | 1 + crates/utils/src/cache.rs | 4 ++- 10 files changed, 37 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50365c4..4fa47b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -41,6 +41,7 @@ name = "copager_cfg" version = "0.1.1" dependencies = [ "anyhow", + "serde", "thiserror", ] @@ -131,6 +132,7 @@ dependencies = [ "copager_parse", "copager_utils", "itertools", + "serde", "thiserror", ] @@ -139,6 +141,7 @@ name = "copager_utils" version = "0.1.1" dependencies = [ "anyhow", + "serde", "thiserror", ] diff --git a/crates/cfg/Cargo.toml b/crates/cfg/Cargo.toml index b710730..f8173f4 100644 --- a/crates/cfg/Cargo.toml +++ b/crates/cfg/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true } diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 5a61f72..038a4df 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -2,11 +2,13 @@ use std::collections::HashMap; use std::fmt::Debug; use std::hash::Hash; +use serde::{Serialize, Deserialize}; + use crate::token::TokenTag; pub trait RuleTag where - Self: Debug + Copy + Clone + Hash + Eq, + Self: Debug + Copy + Clone + Hash + Eq + Serialize + for<'de> Deserialize<'de>, { type TokenTag: TokenTag; diff --git a/crates/cfg/src/token.rs b/crates/cfg/src/token.rs index 1469f80..911a674 100644 --- a/crates/cfg/src/token.rs +++ b/crates/cfg/src/token.rs @@ -1,9 +1,11 @@ use std::fmt::Debug; use std::hash::Hash; +use serde::{Serialize, Deserialize}; + pub trait TokenTag where - Self: Debug + Copy + Clone + Hash + Eq, + Self: Debug + Copy + Clone + Hash + Eq + Serialize + for<'de> Deserialize<'de>, { fn as_str<'a, 'b>(&'a self) -> &'b str; } diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index f231c6b..c8722e1 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -2,82 +2,56 @@ use regex::{Regex, RegexSet}; use copager_cfg::token::{TokenTag, Token}; use copager_lex::{LexSource, LexIterator}; -use copager_utils::cache::Cacheable; -struct RegexLexer<'cache, 'input, S: LexSource> { +#[derive(Debug)] +struct RegexLexer<'input, S: LexSource> { // regex - regex_istr: &'cache Regex, - regex_set: &'cache RegexSet, - regex_map: &'cache Vec<(Regex, S::Tag)>, + regex_istr: Regex, + regex_set: RegexSet, + regex_map: Vec<(Regex, S::Tag)>, // state input: &'input str, pos: usize, } -struct RegexLexerCache { - regex_istr: Regex, - regex_set: RegexSet, - regex_map: Vec<(Regex, S::Tag)>, -} - -impl<'cache, 'input, T, S> Cacheable<'cache, S> for RegexLexer<'cache, 'input, S> +impl<'input, T, S> From for RegexLexer<'input, S> where T: TokenTag, S: LexSource, { - type Cache = RegexLexerCache; - - fn new(source: S) -> anyhow::Result { - let regex_istr = Regex::new(source.ignore_token())?; + fn from(source: S) -> Self { // TODO: -> try_from + let regex_istr = Regex::new(source.ignore_token()).unwrap(); let regex_set = source.iter() .map(|token| token.as_str()) .collect::>(); - let regex_set = RegexSet::new(regex_set)?; + let regex_set = RegexSet::new(regex_set).unwrap(); let regex_map = source.iter() .map(|token| Ok((Regex::new(token.as_str())?, token))) - .collect::>>()?; + .collect::>>().unwrap(); - Ok(RegexLexerCache { + RegexLexer { regex_istr, regex_set, regex_map, - }) - } - - fn restore(cache: &'cache Self::Cache) -> Self { - RegexLexer { - regex_istr: &cache.regex_istr, - regex_set: &cache.regex_set, - regex_map: &cache.regex_map, input: "", pos: 0, } } } -impl<'cache, 'input, T, S> From<&'cache RegexLexerCache> for RegexLexer<'cache, 'input, S> +impl<'input, T, S> LexIterator<'input, T> for RegexLexer<'input, S> where T: TokenTag, S: LexSource, { - fn from(value: &'cache RegexLexerCache) -> Self { - Self::restore(value) - } -} - -impl<'cache, 'input, T, S> LexIterator<'input, T> for RegexLexer<'cache, 'input, S> -where - T: TokenTag, - S: LexSource + 'cache, -{ - type From = &'cache RegexLexerCache; + type From = S; fn init(&self, input: &'input str) -> Self { RegexLexer { - regex_istr: self.regex_istr, - regex_set: self.regex_set, - regex_map: self.regex_map, + regex_istr: self.regex_istr.clone(), + regex_set: self.regex_set.clone(), + regex_map: self.regex_map.clone(), input: input, pos: 0, } diff --git a/crates/parse_lr1/Cargo.toml b/crates/parse_lr1/Cargo.toml index dda421c..14df8fa 100644 --- a/crates/parse_lr1/Cargo.toml +++ b/crates/parse_lr1/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true } itertools = "0.12.1" copager_core = { path = "../core" } copager_cfg = { path = "../cfg" } diff --git a/crates/parse_lr1/src/builder.rs b/crates/parse_lr1/src/builder.rs index af3507d..7590fee 100644 --- a/crates/parse_lr1/src/builder.rs +++ b/crates/parse_lr1/src/builder.rs @@ -2,13 +2,14 @@ use std::collections::{HashMap, HashSet}; use std::hash::Hash; use itertools::Itertools; +use serde::{Serialize, Deserialize}; use copager_cfg::token::TokenTag; use copager_cfg::rule::{Rule, RuleElem, RuleSet}; use copager_lex::LexSource; use copager_parse::ParseSource; -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub enum LRAction { Shift(usize), Reduce(R, usize, usize), // tag, goto_id, elems_cnt @@ -16,7 +17,7 @@ pub enum LRAction { None, } -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct LR1Configure where Sl: LexSource, diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index b4668fa..517a956 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -3,6 +3,8 @@ mod builder; use std::marker::PhantomData; +use serde::{Serialize, Deserialize}; + use copager_lex::{LexSource, LexIterator}; use copager_parse::{ParseSource, ParseIterator}; use copager_utils::cache::Cacheable; @@ -10,6 +12,7 @@ use copager_utils::cache::Cacheable; use builder::{LR1Configure, LRAction}; use error::ParseError; +#[derive(Debug)] pub struct LR1<'cache, 'input, Sl, Il, Sp> where Sl: LexSource, diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 0f040b9..3a9ffc7 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true } diff --git a/crates/utils/src/cache.rs b/crates/utils/src/cache.rs index 1f42e4c..e8b3374 100644 --- a/crates/utils/src/cache.rs +++ b/crates/utils/src/cache.rs @@ -1,8 +1,10 @@ +use serde::{Serialize, Deserialize}; + pub trait Cacheable<'cache, F> where Self: Sized, { - type Cache; + type Cache: Serialize + Deserialize<'cache>; fn new(from: F) -> anyhow::Result; fn restore(cache: &'cache Self::Cache) -> Self; From c440854167a7ac3b4e9085709152c94ba7ccf87c Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Fri, 30 Aug 2024 00:21:26 +0900 Subject: [PATCH 13/55] =?UTF-8?q?[remove]=20cfg::RuleKind=20=E5=89=8A?= =?UTF-8?q?=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/lib.rs | 28 ---------------------------- crates/ir/src/lib.rs | 6 +++--- crates/ir_sexp/src/lib.rs | 14 +++++++------- 3 files changed, 10 insertions(+), 38 deletions(-) diff --git a/crates/cfg/src/lib.rs b/crates/cfg/src/lib.rs index 892e40a..9ddfc51 100644 --- a/crates/cfg/src/lib.rs +++ b/crates/cfg/src/lib.rs @@ -1,30 +1,2 @@ pub mod rule; pub mod token; - -use std::hash::Hash; - -use token::TokenTag; -use rule::{Rule, RuleSet}; - -pub trait RuleKind -where - Self: Clone + Hash + Eq, - T: TokenTag, -{ - - fn into_rules(&self) -> Vec>; - fn into_iter() -> impl Iterator; - - fn into_ruleset() -> RuleSet { - Self::into_iter() - .enumerate() - .flat_map(|(idx, elem)| { - let mut rules = Self::into_rules(&elem); - for rule in &mut rules { - rule.id = idx; - } - rules - }) - .collect::>() - } -} diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 31dc628..4335f6c 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -1,10 +1,10 @@ use copager_cfg::token::TokenTag; -use copager_cfg::RuleKind; +use copager_cfg::rule::RuleTag; pub trait IR where T: TokenTag, - R: RuleKind, + R: RuleTag, { type Builder: IRBuilder; } @@ -12,7 +12,7 @@ where pub trait IRBuilder where T: TokenTag, - R: RuleKind, + R: RuleTag, { type Output: IR; diff --git a/crates/ir_sexp/src/lib.rs b/crates/ir_sexp/src/lib.rs index 7fe0d6c..dba3640 100644 --- a/crates/ir_sexp/src/lib.rs +++ b/crates/ir_sexp/src/lib.rs @@ -1,14 +1,14 @@ use std::fmt::{Debug, Display}; use copager_cfg::token::{TokenTag, Token}; -use copager_cfg::RuleKind; +use copager_cfg::rule::RuleTag; use copager_ir::{IR, IRBuilder}; #[derive(Debug)] pub enum SExp<'input, T, S> where T: TokenTag, - S: RuleKind, + S: RuleTag, { List { tag: S, @@ -20,7 +20,7 @@ where impl Display for SExp<'_, T, S> where T: TokenTag, - S: RuleKind + Debug, + S: RuleTag + Debug, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -39,7 +39,7 @@ where impl<'input, T, R> IR for SExp<'input, T, R> where T: TokenTag, - R: RuleKind, + R: RuleTag, { type Builder = SExpBuilder<'input, T, R>; } @@ -48,7 +48,7 @@ where pub struct SExpBuilder<'input, T, R> where T: TokenTag, - R: RuleKind, + R: RuleTag, { stack: Vec>, } @@ -56,7 +56,7 @@ where impl <'input, T, R> IRBuilder for SExpBuilder<'input, T, R> where T: TokenTag, - R: RuleKind, + R: RuleTag, { type Output = SExp<'input, T, R>; @@ -76,7 +76,7 @@ where impl<'input, T, R> SExpBuilder<'input, T, R> where T: TokenTag, - R: RuleKind, + R: RuleTag, { pub fn push(&mut self, token: Token<'input, T>) { self.stack.push(SExp::Atom(token)); From c9fcfa29f1eab6f8aaf534b1a6fd46aafd65664d Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Fri, 30 Aug 2024 01:00:19 +0900 Subject: [PATCH 14/55] =?UTF-8?q?[change]=20{Token,Rule}Tag=20=E3=81=8C=20?= =?UTF-8?q?Serialize&Deserialize=E3=82=92=E8=A6=81=E6=B1=82=E3=81=97?= =?UTF-8?q?=E3=81=AA=E3=81=84=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 - crates/cfg/Cargo.toml | 1 - crates/cfg/src/rule.rs | 4 +-- crates/cfg/src/token.rs | 4 +-- crates/parse_lr1/src/builder.rs | 55 ++++++++++++++++++++++++++++++++- crates/parse_lr1/src/lib.rs | 16 +++++----- 6 files changed, 65 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4fa47b6..f84a721 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -41,7 +41,6 @@ name = "copager_cfg" version = "0.1.1" dependencies = [ "anyhow", - "serde", "thiserror", ] diff --git a/crates/cfg/Cargo.toml b/crates/cfg/Cargo.toml index f8173f4..b710730 100644 --- a/crates/cfg/Cargo.toml +++ b/crates/cfg/Cargo.toml @@ -6,4 +6,3 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde = { workspace = true } diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 038a4df..5a61f72 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -2,13 +2,11 @@ use std::collections::HashMap; use std::fmt::Debug; use std::hash::Hash; -use serde::{Serialize, Deserialize}; - use crate::token::TokenTag; pub trait RuleTag where - Self: Debug + Copy + Clone + Hash + Eq + Serialize + for<'de> Deserialize<'de>, + Self: Debug + Copy + Clone + Hash + Eq, { type TokenTag: TokenTag; diff --git a/crates/cfg/src/token.rs b/crates/cfg/src/token.rs index 911a674..1469f80 100644 --- a/crates/cfg/src/token.rs +++ b/crates/cfg/src/token.rs @@ -1,11 +1,9 @@ use std::fmt::Debug; use std::hash::Hash; -use serde::{Serialize, Deserialize}; - pub trait TokenTag where - Self: Debug + Copy + Clone + Hash + Eq + Serialize + for<'de> Deserialize<'de>, + Self: Debug + Copy + Clone + Hash + Eq, { fn as_str<'a, 'b>(&'a self) -> &'b str; } diff --git a/crates/parse_lr1/src/builder.rs b/crates/parse_lr1/src/builder.rs index 7590fee..a6fab5d 100644 --- a/crates/parse_lr1/src/builder.rs +++ b/crates/parse_lr1/src/builder.rs @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet}; use std::hash::Hash; use itertools::Itertools; +use serde::ser::SerializeStruct; use serde::{Serialize, Deserialize}; use copager_cfg::token::TokenTag; @@ -17,7 +18,7 @@ pub enum LRAction { None, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug)] pub struct LR1Configure where Sl: LexSource, @@ -28,6 +29,58 @@ where pub goto_table: Vec>, } +impl Serialize for LR1Configure +where + Sl: LexSource, + Sl::Tag: Serialize, + Sp: ParseSource, + Sp::Tag: Serialize, +{ + fn serialize(&self, serializer: S) -> Result + where + S: serde::ser::Serializer, + { + let mut state = serializer.serialize_struct("LR1Configure", 3)?; + state.serialize_field("action_table", &self.action_table)?; + state.serialize_field("eof_action_table", &self.eof_action_table)?; + state.serialize_field("goto_table", &self.goto_table)?; + state.end() + } +} + +impl<'de, Sl, Sp> Deserialize<'de> for LR1Configure +where + Sl: LexSource, + Sl::Tag: for<'de_o> Deserialize<'de_o>, + Sp: ParseSource, + Sp::Tag: for<'de_o> Deserialize<'de_o>, +{ + fn deserialize(deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + #[derive(Deserialize)] + struct LR1ConfigureHelper + where + Sl: LexSource, + Sl::Tag: for<'de_h> Deserialize<'de_h>, + Sp: ParseSource, + Sp::Tag: for<'de_h> Deserialize<'de_h>, + { + action_table: Vec>>, + eof_action_table: Vec>, + goto_table: Vec>, + } + + let helper = LR1ConfigureHelper::::deserialize(deserializer)?; + Ok(LR1Configure { + action_table: helper.action_table, + eof_action_table: helper.eof_action_table, + goto_table: helper.goto_table, + }) + } +} + impl LR1Configure where Sl: LexSource, diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index 517a956..743dcd9 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -33,8 +33,10 @@ where impl<'cache, 'input, Sl, Il, Sp> Cacheable<'cache, (Sl, Sp)> for LR1<'cache, 'input, Sl, Il, Sp> where Sl: LexSource, + Sl::Tag: Serialize + for<'de> Deserialize<'de>, Il: LexIterator<'input, Sl::Tag>, Sp: ParseSource, + Sp::Tag: Serialize + for<'de> Deserialize<'de>, { type Cache = LR1Configure; @@ -43,12 +45,7 @@ where } fn restore(tables: &'cache Self::Cache) -> Self { - LR1 { - tables, - lexer: None, - stack: None, - _phantom: PhantomData, - } + Self::from(tables) } } @@ -59,7 +56,12 @@ where Sp: ParseSource, { fn from(tables: &'cache LR1Configure) -> Self { - Self::restore(tables) + LR1 { + tables, + lexer: None, + stack: None, + _phantom: PhantomData, + } } } From 7cff3a852ddaa946f77b0a00884d1769e4e524ce Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Fri, 30 Aug 2024 01:16:54 +0900 Subject: [PATCH 15/55] =?UTF-8?q?[update]=20lex=5Fregex/RegexLexer=20?= =?UTF-8?q?=E3=81=AE=20init=20=E5=87=A6=E7=90=86=E3=81=A7=20clone=20?= =?UTF-8?q?=E3=82=92=E5=91=BC=E3=81=B0=E3=81=AA=E3=81=84=E3=82=88=E3=81=86?= =?UTF-8?q?=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/lex_regex/src/lib.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index c8722e1..5465f23 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -1,3 +1,5 @@ +use std::rc::Rc; + use regex::{Regex, RegexSet}; use copager_cfg::token::{TokenTag, Token}; @@ -6,9 +8,9 @@ use copager_lex::{LexSource, LexIterator}; #[derive(Debug)] struct RegexLexer<'input, S: LexSource> { // regex - regex_istr: Regex, - regex_set: RegexSet, - regex_map: Vec<(Regex, S::Tag)>, + regex_istr: Rc, + regex_set: Rc, + regex_map: Rc>, // state input: &'input str, @@ -31,9 +33,9 @@ where .collect::>>().unwrap(); RegexLexer { - regex_istr, - regex_set, - regex_map, + regex_istr: Rc::new(regex_istr), + regex_set: Rc::new(regex_set), + regex_map: Rc::new(regex_map), input: "", pos: 0, } @@ -49,9 +51,9 @@ where fn init(&self, input: &'input str) -> Self { RegexLexer { - regex_istr: self.regex_istr.clone(), - regex_set: self.regex_set.clone(), - regex_map: self.regex_map.clone(), + regex_istr: Rc::clone(&self.regex_istr), + regex_set: Rc::clone(&self.regex_set), + regex_map: Rc::clone(&self.regex_map), input: input, pos: 0, } From 370ac2586a4810c7d9d03d303e769d272027bc8f Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Fri, 30 Aug 2024 01:21:41 +0900 Subject: [PATCH 16/55] =?UTF-8?q?[change]=20RuleTag=20=E3=81=8C=E9=96=A2?= =?UTF-8?q?=E9=80=A3=E5=9E=8B=E3=81=A8=E3=81=97=E3=81=A6=E6=8C=81=E3=81=A3?= =?UTF-8?q?=E3=81=A6=E3=81=84=E3=81=9F=20TokenTag=20=E3=82=92=E5=9E=8B?= =?UTF-8?q?=E5=A4=89=E6=95=B0=E3=81=A8=E3=81=97=E3=81=A6=E6=8C=81=E3=81=A4?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/cfg/src/rule.rs | 6 ++---- crates/ir/src/lib.rs | 4 ++-- crates/ir_sexp/src/lib.rs | 12 ++++++------ crates/parse/src/lib.rs | 4 ++-- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 5a61f72..053b4d8 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -4,13 +4,11 @@ use std::hash::Hash; use crate::token::TokenTag; -pub trait RuleTag +pub trait RuleTag where Self: Debug + Copy + Clone + Hash + Eq, { - type TokenTag: TokenTag; - - fn as_rules<'a, 'b>(&'a self) -> Vec<&'b Rule>; + fn as_rules<'a, 'b>(&'a self) -> Vec<&'b Rule>; } #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 4335f6c..346e624 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -4,7 +4,7 @@ use copager_cfg::rule::RuleTag; pub trait IR where T: TokenTag, - R: RuleTag, + R: RuleTag, { type Builder: IRBuilder; } @@ -12,7 +12,7 @@ where pub trait IRBuilder where T: TokenTag, - R: RuleTag, + R: RuleTag, { type Output: IR; diff --git a/crates/ir_sexp/src/lib.rs b/crates/ir_sexp/src/lib.rs index dba3640..09395b5 100644 --- a/crates/ir_sexp/src/lib.rs +++ b/crates/ir_sexp/src/lib.rs @@ -8,7 +8,7 @@ use copager_ir::{IR, IRBuilder}; pub enum SExp<'input, T, S> where T: TokenTag, - S: RuleTag, + S: RuleTag, { List { tag: S, @@ -20,7 +20,7 @@ where impl Display for SExp<'_, T, S> where T: TokenTag, - S: RuleTag + Debug, + S: RuleTag + Debug, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -39,7 +39,7 @@ where impl<'input, T, R> IR for SExp<'input, T, R> where T: TokenTag, - R: RuleTag, + R: RuleTag, { type Builder = SExpBuilder<'input, T, R>; } @@ -48,7 +48,7 @@ where pub struct SExpBuilder<'input, T, R> where T: TokenTag, - R: RuleTag, + R: RuleTag, { stack: Vec>, } @@ -56,7 +56,7 @@ where impl <'input, T, R> IRBuilder for SExpBuilder<'input, T, R> where T: TokenTag, - R: RuleTag, + R: RuleTag, { type Output = SExp<'input, T, R>; @@ -76,7 +76,7 @@ where impl<'input, T, R> SExpBuilder<'input, T, R> where T: TokenTag, - R: RuleTag, + R: RuleTag, { pub fn push(&mut self, token: Token<'input, T>) { self.stack.push(SExp::Atom(token)); diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 2492fd0..a54f1d6 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -3,7 +3,7 @@ use copager_cfg::rule::{RuleTag, RuleSet}; use copager_lex::LexIterator; pub trait ParseSource { - type Tag: RuleTag; + type Tag: RuleTag; fn iter(&self) -> impl Iterator; @@ -29,7 +29,7 @@ pub trait ParseIterator<'input, T, R, Il> where Self: From, T: TokenTag, - R: RuleTag, + R: RuleTag, Il: LexIterator<'input, T>, { type From; From 1c79ec5f1ce4fbdc02cf4c15333936fbd5318808 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 16:36:17 +0900 Subject: [PATCH 17/55] =?UTF-8?q?[update]=20LexSource=20=E5=90=91=E3=81=91?= =?UTF-8?q?=20derive=20=E3=83=9E=E3=82=AF=E3=83=AD=E3=82=92=20lex=5Fderive?= =?UTF-8?q?=20=E3=81=AB=E7=A7=BB=E5=8B=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 15 ++++ Cargo.toml | 2 + crates/lex/Cargo.toml | 1 + crates/lex/src/lib.rs | 1 + crates/lex_derive/Cargo.toml | 18 ++++ crates/lex_derive/src/impl.rs | 1 + crates/lex_derive/src/impl/lex.rs | 110 ++++++++++++++++++++++++ crates/lex_derive/src/lib.rs | 9 ++ crates/lex_derive/tests/simple.rs | 27 ++++++ crates/lex_derive/tests/with_ignored.rs | 30 +++++++ 10 files changed, 214 insertions(+) create mode 100644 crates/lex_derive/Cargo.toml create mode 100644 crates/lex_derive/src/impl.rs create mode 100644 crates/lex_derive/src/impl/lex.rs create mode 100644 crates/lex_derive/src/lib.rs create mode 100644 crates/lex_derive/tests/simple.rs create mode 100644 crates/lex_derive/tests/with_ignored.rs diff --git a/Cargo.lock b/Cargo.lock index f84a721..584cb03 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,7 @@ dependencies = [ "copager_ir", "copager_ir_sexp", "copager_lex", + "copager_lex_derive", "copager_lex_regex", "copager_parse", "copager_parse_lr1", @@ -92,10 +93,24 @@ version = "0.1.1" dependencies = [ "anyhow", "copager_cfg", + "copager_lex_derive", "copager_utils", "thiserror", ] +[[package]] +name = "copager_lex_derive" +version = "0.1.1" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "proc-macro2", + "quote", + "syn", + "thiserror", +] + [[package]] name = "copager_lex_regex" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index e2a385a..8537c20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ copager_core = { path = "./crates/core" } copager_cfg = { path = "./crates/cfg" } copager_cfg_derive = { path = "./crates/cfg_derive" } copager_lex = { path = "./crates/lex" } +copager_lex_derive = { path = "./crates/lex_derive" } copager_lex_regex = { path = "./crates/lex_regex" } copager_parse = { path = "./crates/parse" } copager_parse_lr1 = { path = "./crates/parse_lr1" } @@ -30,6 +31,7 @@ members = [ "./crates/cfg", "./crates/cfg_derive", "./crates/lex", + "./crates/lex_derive", "./crates/lex_regex", "./crates/parse", "./crates/parse_lr1", diff --git a/crates/lex/Cargo.toml b/crates/lex/Cargo.toml index 6e5eeb8..b8863f4 100644 --- a/crates/lex/Cargo.toml +++ b/crates/lex/Cargo.toml @@ -8,3 +8,4 @@ anyhow = { workspace = true } thiserror = { workspace = true } copager_cfg = { path = "../cfg" } copager_utils = { path = "../utils" } +copager_lex_derive = { path = "../lex_derive" } diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs index f3b1cb9..8d3d885 100644 --- a/crates/lex/src/lib.rs +++ b/crates/lex/src/lib.rs @@ -1,4 +1,5 @@ use copager_cfg::token::{TokenTag, Token}; +pub use copager_lex_derive::LexSource; pub trait LexSource { type Tag: TokenTag; diff --git a/crates/lex_derive/Cargo.toml b/crates/lex_derive/Cargo.toml new file mode 100644 index 0000000..2052070 --- /dev/null +++ b/crates/lex_derive/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "copager_lex_derive" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +proc-macro2 = "1.0" +quote = "1.0" +syn = { version ="2.0", features = ["full", "extra-traits"] } + +[dev-dependencies] +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } + +[lib] +proc-macro = true diff --git a/crates/lex_derive/src/impl.rs b/crates/lex_derive/src/impl.rs new file mode 100644 index 0000000..88eb3c4 --- /dev/null +++ b/crates/lex_derive/src/impl.rs @@ -0,0 +1 @@ +pub mod lex; diff --git a/crates/lex_derive/src/impl/lex.rs b/crates/lex_derive/src/impl/lex.rs new file mode 100644 index 0000000..34d1d81 --- /dev/null +++ b/crates/lex_derive/src/impl/lex.rs @@ -0,0 +1,110 @@ +use proc_macro2::TokenStream; +use quote::quote; +use syn::{Data, DeriveInput, Variant, Ident, LitStr}; + +pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { + let data_enum = if let Data::Enum(data_enum) = ast.data { + data_enum + } else { + panic!("\"LexSource\" proc-macro is only implemented for enum.") + }; + + let parsed_variantes = data_enum + .variants + .iter() + .map(|variant| VariantInfo::parse(&ast.ident, variant)) + .collect::>(); + + let enum_name = &ast.ident; + let enum_matcher_table = parsed_variantes + .iter() + .map(|variant| variant.gen_ident_matcher()); + let enum_ignored = parsed_variantes + .iter() + .find(|variant| variant.ignored) + .map(|variant| variant.text.as_ref().unwrap().as_str()) + .unwrap_or(""); + let enum_variants = parsed_variantes + .iter() + .filter(|variant| !variant.ignored) + .map(|variant| variant.gen_ident()); + + quote! { + impl TokenTag for #enum_name { + fn as_str<'a, 'b>(&'a self) -> &'b str { + match self { + #( #enum_matcher_table, )* + } + } + } + + impl LexSource for #enum_name { + type Tag = Self; + + fn ignore_token(&self) -> &'static str { + #enum_ignored + } + + fn iter(&self) -> impl Iterator { + vec![ #( #enum_variants, )* ].into_iter() + } + } + } +} + +#[derive(Debug)] +struct VariantInfo<'a> { + parent_ident: &'a Ident, + self_ident: &'a Ident, + text: Option, + ignored: bool, +} + +impl<'a> VariantInfo<'a> { + fn parse(parent_ident: &'a Ident, variant: &'a Variant) -> VariantInfo<'a> { + let self_ident = &variant.ident; + + let mut text = None; + let mut ignored = false; + for attr in &variant.attrs { + let _ = attr.parse_nested_meta(|meta| { + // #[...(text = "...")] + if meta.path.is_ident("text") { + let raw_text = meta.value()?.parse::()?.value(); + text = Some(format!("^{}", raw_text)); + return Ok(()); + } + + // #[...(ignord)] + if meta.path.is_ident("ignored") { + ignored = true; + return Ok(()); + } + + Err(meta.error("Unknown attribute")) + }); + } + + VariantInfo { + parent_ident, + self_ident, + text, + ignored, + } + } + + fn gen_ident(&self) -> TokenStream { + let parent_ident = self.parent_ident; + let self_ident = self.self_ident; + + quote! { #parent_ident :: #self_ident } + } + + fn gen_ident_matcher(&self) -> TokenStream { + let ident = self.gen_ident(); + match &self.text { + Some(text) => quote! { #ident => #text }, + None => quote! { #ident => unimplemented!() }, + } + } +} diff --git a/crates/lex_derive/src/lib.rs b/crates/lex_derive/src/lib.rs new file mode 100644 index 0000000..9818d98 --- /dev/null +++ b/crates/lex_derive/src/lib.rs @@ -0,0 +1,9 @@ +mod r#impl; + +use syn::{parse_macro_input, DeriveInput}; + +#[proc_macro_derive(LexSource, attributes(token))] +pub fn derive_tokenset(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let ast = parse_macro_input!(input as DeriveInput); + r#impl::lex::proc_macro_impl(ast).into() +} diff --git a/crates/lex_derive/tests/simple.rs b/crates/lex_derive/tests/simple.rs new file mode 100644 index 0000000..8b8d448 --- /dev/null +++ b/crates/lex_derive/tests/simple.rs @@ -0,0 +1,27 @@ +use copager_cfg::token::TokenTag; +use copager_lex::LexSource; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum MyToken { + #[default] + #[token(text = r"\+")] + Abc, + #[token(text = r"\-")] + Def, + #[token(text = r"[1-9]+")] + Number, +} + + +#[test] +fn check_compile_simple() { + // LexSource + let mytoken = MyToken::default(); + assert!(mytoken.ignore_token().is_empty()); + assert_eq!(mytoken.iter().count(), 3); + + // TokenTag + assert_eq!(MyToken::Abc.as_str(), r"^\+"); + assert_eq!(MyToken::Def.as_str(), r"^\-"); + assert_eq!(MyToken::Number.as_str(), r"^[1-9]+"); +} diff --git a/crates/lex_derive/tests/with_ignored.rs b/crates/lex_derive/tests/with_ignored.rs new file mode 100644 index 0000000..09d1e5e --- /dev/null +++ b/crates/lex_derive/tests/with_ignored.rs @@ -0,0 +1,30 @@ +use copager_cfg::token::TokenTag; +use copager_lex::LexSource; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum MyToken { + #[default] + #[token(text = r"\+")] + Abc, + #[token(text = r"\-")] + Def, + #[token(text = r"[1-9]+")] + Number, + #[token(text = r"[ \t\n]+", ignored)] + _WhiteSpace, +} + + +#[test] +fn check_compile_with_ignored() { + // LexSource + let mytoken = MyToken::default(); + assert_eq!(mytoken.ignore_token(), r"^[ \t\n]+"); + assert_eq!(mytoken.iter().count(), 3); + + // TokenTag + assert_eq!(MyToken::Abc.as_str(), r"^\+"); + assert_eq!(MyToken::Def.as_str(), r"^\-"); + assert_eq!(MyToken::Number.as_str(), r"^[1-9]+"); + assert_eq!(MyToken::_WhiteSpace.as_str(), r"^[ \t\n]+"); +} From 85b1d8fd6bfbbf4a21a9181f8369c85fddd83b14 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 16:44:34 +0900 Subject: [PATCH 18/55] =?UTF-8?q?[add]=20lex=20=E3=82=AF=E3=83=AC=E3=83=BC?= =?UTF-8?q?=E3=83=88=E3=81=AB=20derive=20=E3=83=95=E3=83=A9=E3=82=B0?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/lex/Cargo.toml | 6 +++++- crates/lex/src/lib.rs | 1 + crates/lex_derive/Cargo.toml | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/lex/Cargo.toml b/crates/lex/Cargo.toml index b8863f4..4f8c3f6 100644 --- a/crates/lex/Cargo.toml +++ b/crates/lex/Cargo.toml @@ -8,4 +8,8 @@ anyhow = { workspace = true } thiserror = { workspace = true } copager_cfg = { path = "../cfg" } copager_utils = { path = "../utils" } -copager_lex_derive = { path = "../lex_derive" } +copager_lex_derive = { path = "../lex_derive", optional = true } + +[features] +default = [] +derive = ["copager_lex_derive"] diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs index 8d3d885..744fa1a 100644 --- a/crates/lex/src/lib.rs +++ b/crates/lex/src/lib.rs @@ -1,4 +1,5 @@ use copager_cfg::token::{TokenTag, Token}; +#[cfg(feature = "derive")] pub use copager_lex_derive::LexSource; pub trait LexSource { diff --git a/crates/lex_derive/Cargo.toml b/crates/lex_derive/Cargo.toml index 2052070..dde1eb2 100644 --- a/crates/lex_derive/Cargo.toml +++ b/crates/lex_derive/Cargo.toml @@ -12,7 +12,7 @@ syn = { version ="2.0", features = ["full", "extra-traits"] } [dev-dependencies] copager_cfg = { path = "../cfg" } -copager_lex = { path = "../lex" } +copager_lex = { path = "../lex", features = ["derive"] } [lib] proc-macro = true From 83c329d6e1ab1b9b516245caa8f3810f06112f2d Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 17:24:49 +0900 Subject: [PATCH 19/55] =?UTF-8?q?[clean]=20LR1Configure=20=E3=81=B8?= =?UTF-8?q?=E3=81=AE=20Serialize,=20Deserialize=20=E3=81=AE=E5=AE=9F?= =?UTF-8?q?=E8=A3=85=E6=96=B9=E6=B3=95=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr1/src/builder.rs | 59 +++------------------------------ 1 file changed, 5 insertions(+), 54 deletions(-) diff --git a/crates/parse_lr1/src/builder.rs b/crates/parse_lr1/src/builder.rs index a6fab5d..de05ad1 100644 --- a/crates/parse_lr1/src/builder.rs +++ b/crates/parse_lr1/src/builder.rs @@ -2,7 +2,6 @@ use std::collections::{HashMap, HashSet}; use std::hash::Hash; use itertools::Itertools; -use serde::ser::SerializeStruct; use serde::{Serialize, Deserialize}; use copager_cfg::token::TokenTag; @@ -18,69 +17,21 @@ pub enum LRAction { None, } -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct LR1Configure where Sl: LexSource, Sp: ParseSource, { + #[serde(bound( + serialize = "Sl::Tag: Serialize, Sp::Tag: Serialize", + deserialize = "Sl::Tag: Deserialize<'de>, Sp::Tag: Deserialize<'de>", + ))] pub action_table: Vec>>, pub eof_action_table: Vec>, pub goto_table: Vec>, } -impl Serialize for LR1Configure -where - Sl: LexSource, - Sl::Tag: Serialize, - Sp: ParseSource, - Sp::Tag: Serialize, -{ - fn serialize(&self, serializer: S) -> Result - where - S: serde::ser::Serializer, - { - let mut state = serializer.serialize_struct("LR1Configure", 3)?; - state.serialize_field("action_table", &self.action_table)?; - state.serialize_field("eof_action_table", &self.eof_action_table)?; - state.serialize_field("goto_table", &self.goto_table)?; - state.end() - } -} - -impl<'de, Sl, Sp> Deserialize<'de> for LR1Configure -where - Sl: LexSource, - Sl::Tag: for<'de_o> Deserialize<'de_o>, - Sp: ParseSource, - Sp::Tag: for<'de_o> Deserialize<'de_o>, -{ - fn deserialize(deserializer: D) -> Result - where - D: serde::de::Deserializer<'de>, - { - #[derive(Deserialize)] - struct LR1ConfigureHelper - where - Sl: LexSource, - Sl::Tag: for<'de_h> Deserialize<'de_h>, - Sp: ParseSource, - Sp::Tag: for<'de_h> Deserialize<'de_h>, - { - action_table: Vec>>, - eof_action_table: Vec>, - goto_table: Vec>, - } - - let helper = LR1ConfigureHelper::::deserialize(deserializer)?; - Ok(LR1Configure { - action_table: helper.action_table, - eof_action_table: helper.eof_action_table, - goto_table: helper.goto_table, - }) - } -} - impl LR1Configure where Sl: LexSource, From a3ab0494d32b10bc7aed9e3dc928809f41bda89d Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 18:04:05 +0900 Subject: [PATCH 20/55] =?UTF-8?q?[update]=20ParseSource=20=E5=90=91?= =?UTF-8?q?=E3=81=91=20derive=20=E3=83=9E=E3=82=AF=E3=83=AD=E3=82=92=20par?= =?UTF-8?q?se=5Fdrive=20=E3=81=AB=E7=A7=BB=E5=8B=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 16 ++++ Cargo.toml | 2 + crates/cfg/src/rule.rs | 2 +- crates/parse/Cargo.toml | 5 ++ crates/parse/src/lib.rs | 3 +- crates/parse_derive/Cargo.toml | 19 +++++ crates/parse_derive/src/impl.rs | 1 + crates/parse_derive/src/impl/rule.rs | 117 +++++++++++++++++++++++++++ crates/parse_derive/src/lib.rs | 9 +++ crates/parse_derive/tests/simple.rs | 40 +++++++++ 10 files changed, 212 insertions(+), 2 deletions(-) create mode 100644 crates/parse_derive/Cargo.toml create mode 100644 crates/parse_derive/src/impl.rs create mode 100644 crates/parse_derive/src/impl/rule.rs create mode 100644 crates/parse_derive/src/lib.rs create mode 100644 crates/parse_derive/tests/simple.rs diff --git a/Cargo.lock b/Cargo.lock index 584cb03..ecddf92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,6 +31,7 @@ dependencies = [ "copager_lex_derive", "copager_lex_regex", "copager_parse", + "copager_parse_derive", "copager_parse_lr1", "copager_utils", "serde", @@ -131,10 +132,25 @@ dependencies = [ "anyhow", "copager_cfg", "copager_lex", + "copager_parse_derive", "copager_utils", "thiserror", ] +[[package]] +name = "copager_parse_derive" +version = "0.1.1" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_lex", + "copager_parse", + "proc-macro2", + "quote", + "syn", + "thiserror", +] + [[package]] name = "copager_parse_lr1" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index 8537c20..2d2168d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ copager_lex = { path = "./crates/lex" } copager_lex_derive = { path = "./crates/lex_derive" } copager_lex_regex = { path = "./crates/lex_regex" } copager_parse = { path = "./crates/parse" } +copager_parse_derive = { path = "./crates/parse_derive" } copager_parse_lr1 = { path = "./crates/parse_lr1" } copager_ir = { path = "./crates/ir" } copager_ir_sexp = { path = "./crates/ir_sexp" } @@ -34,6 +35,7 @@ members = [ "./crates/lex_derive", "./crates/lex_regex", "./crates/parse", + "./crates/parse_derive", "./crates/parse_lr1", "./crates/ir", "./crates/ir_sexp", diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 053b4d8..2f1bd24 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -8,7 +8,7 @@ pub trait RuleTag where Self: Debug + Copy + Clone + Hash + Eq, { - fn as_rules<'a, 'b>(&'a self) -> Vec<&'b Rule>; + fn as_rules(&self) -> Vec>; } #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/crates/parse/Cargo.toml b/crates/parse/Cargo.toml index 89ca74d..1d13812 100644 --- a/crates/parse/Cargo.toml +++ b/crates/parse/Cargo.toml @@ -8,4 +8,9 @@ anyhow = { workspace = true } thiserror = { workspace = true } copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } +copager_parse_derive = { path = "../parse_derive", optional = true } copager_utils = { path = "../utils" } + +[features] +default = [] +derive = ["copager_parse_derive"] diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index a54f1d6..5f07a94 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -1,6 +1,8 @@ use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleTag, RuleSet}; use copager_lex::LexIterator; +#[cfg(feature = "derive")] +pub use copager_parse_derive::ParseSource; pub trait ParseSource { type Tag: RuleTag; @@ -17,7 +19,6 @@ pub trait ParseSource { rule }) }; - self.iter() .enumerate() .flat_map(set_id_for_all) diff --git a/crates/parse_derive/Cargo.toml b/crates/parse_derive/Cargo.toml new file mode 100644 index 0000000..f923437 --- /dev/null +++ b/crates/parse_derive/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "copager_parse_derive" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +proc-macro2 = "1.0" +quote = "1.0" +syn = { version ="2.0", features = ["full", "extra-traits"] } + +[dev-dependencies] +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_parse = { path = "../parse", features = ["derive"] } + +[lib] +proc-macro = true diff --git a/crates/parse_derive/src/impl.rs b/crates/parse_derive/src/impl.rs new file mode 100644 index 0000000..90d8760 --- /dev/null +++ b/crates/parse_derive/src/impl.rs @@ -0,0 +1 @@ +pub mod rule; diff --git a/crates/parse_derive/src/impl/rule.rs b/crates/parse_derive/src/impl/rule.rs new file mode 100644 index 0000000..08b722e --- /dev/null +++ b/crates/parse_derive/src/impl/rule.rs @@ -0,0 +1,117 @@ +use proc_macro2::TokenStream; +use quote::quote; +use syn::{Data, DeriveInput, Variant, Ident, LitStr}; + +pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { + let data_enum = if let Data::Enum(data_enum) = ast.data { + data_enum + } else { + panic!("\"ParseResource\" proc-macro is only implemented for enum.") + }; + + let parsed_variantes = data_enum + .variants + .iter() + .map(|variant| VariantInfo::parse(&ast.ident, variant)) + .collect::>(); + + let enum_name = &ast.ident; + let enum_matcher_table = parsed_variantes + .iter() + .map(|variant| variant.gen_ident_matcher()); + let enum_assoc_type = format!("{}", enum_name) + .replace("Rule", "Token") + .parse::() + .unwrap(); + let enum_variants = parsed_variantes + .iter() + .map(|variant| variant.gen_ident()); + + quote! { + impl RuleTag<#enum_assoc_type> for #enum_name { + fn as_rules(&self) -> Vec> { + match self { + #( #enum_matcher_table, )* + } + } + } + + impl ParseSource<#enum_assoc_type> for #enum_name { + type Tag = Self; + + fn iter(&self) -> impl Iterator { + vec![ #( #enum_variants, )* ].into_iter() + } + } + } +} + +struct VariantInfo<'a> { + parent_ident: &'a Ident, + self_ident: &'a Ident, + rules: Vec, +} + +impl<'a> VariantInfo<'a> { + fn parse(parent_ident: &'a Ident, variant: &'a Variant) -> VariantInfo<'a> { + let self_ident = &variant.ident; + let token_ident = format!("{}", parent_ident) + .replace("Rule", "Token") + .parse::() + .unwrap(); + + let mut rules = vec![]; + for attr in &variant.attrs { + if attr.path().is_ident("rule") { + let attr = attr.parse_args::().unwrap().value(); + rules.push(parse_rule(&token_ident, &attr)); + } + } + + VariantInfo { + parent_ident, + self_ident, + rules, + } + } + + fn gen_ident(&self) -> TokenStream { + let parent_ident = self.parent_ident; + let self_ident = self.self_ident; + + quote! { #parent_ident :: #self_ident } + } + + fn gen_ident_matcher(&self) -> TokenStream { + let ident = self.gen_ident(); + if self.rules.is_empty() { + quote! { #ident => unimplemented!() } + } else { + let rules = &self.rules; + quote! { #ident => vec![#(#rules),*] } + } + } +} + +fn parse_rule(token: &TokenStream, input: &str) -> TokenStream { + let mut splitted = input.split("::="); + + let lhs = splitted.next().unwrap().trim(); + let lhs = &lhs[1..lhs.len() - 1]; + let lhs = quote! { RuleElem::new_nonterm(#lhs) }; + + let rhs = splitted.collect::() + .split_whitespace() + .map(|elem| { + if elem.starts_with('<') { + let elem = &elem[1..elem.len() - 1]; + quote! { RuleElem::new_nonterm(#elem) } + } else { + let ident = elem.parse::().unwrap(); + quote! { RuleElem::new_term(#token::#ident) } + } + }) + .collect::>(); + + quote! { Rule::from((#lhs, vec![ #( #rhs, )* ])) } +} diff --git a/crates/parse_derive/src/lib.rs b/crates/parse_derive/src/lib.rs new file mode 100644 index 0000000..864b4fc --- /dev/null +++ b/crates/parse_derive/src/lib.rs @@ -0,0 +1,9 @@ +mod r#impl; + +use syn::{parse_macro_input, DeriveInput}; + +#[proc_macro_derive(ParseSource, attributes(rule))] +pub fn derive_parse_source(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let ast = parse_macro_input!(input as DeriveInput); + r#impl::rule::proc_macro_impl(ast).into() +} diff --git a/crates/parse_derive/tests/simple.rs b/crates/parse_derive/tests/simple.rs new file mode 100644 index 0000000..4864d3b --- /dev/null +++ b/crates/parse_derive/tests/simple.rs @@ -0,0 +1,40 @@ +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_cfg::token::TokenTag; +use copager_lex::LexSource; +use copager_parse::ParseSource; + +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum MyToken { + #[token(text = r"\+")] + Plus, + #[token(text = r"\-")] + Minus, + #[token(text = r"[1-9]+")] + Number, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum MyRule { + #[default] + #[rule(" ::= Plus Number")] + #[rule(" ::= Minus Number")] + #[rule(" ::= Number")] + Expr, +} + +#[test] +fn check_compile_simple() { + // ParseSource + let myrule = MyRule::default(); + assert_eq!(myrule.iter().count(), 1); + + // RuleTag + let rules = MyRule::Expr.as_rules(); + assert_eq!(rules.len(), 3); + assert_eq!(rules[0].lhs, RuleElem::new_nonterm("expr")); + assert_eq!(rules[0].rhs, vec![RuleElem::new_nonterm("expr"), RuleElem::new_term(MyToken::Plus), RuleElem::new_term(MyToken::Number)]); + assert_eq!(rules[1].lhs, RuleElem::new_nonterm("expr")); + assert_eq!(rules[1].rhs, vec![RuleElem::new_nonterm("expr"), RuleElem::new_term(MyToken::Minus), RuleElem::new_term(MyToken::Number)]); + assert_eq!(rules[2].lhs, RuleElem::new_nonterm("expr")); + assert_eq!(rules[2].rhs, vec![RuleElem::new_term(MyToken::Number)]); +} From 0a7dc16ccb210ecd70d9d317a7ce53f3c197a991 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 18:05:55 +0900 Subject: [PATCH 21/55] =?UTF-8?q?[remove]=20cfg=5Fderive=20=E3=82=AF?= =?UTF-8?q?=E3=83=AC=E3=83=BC=E3=83=88=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 12 --- Cargo.toml | 2 - crates/cfg_derive/Cargo.toml | 14 ---- crates/cfg_derive/src/impl.rs | 2 - crates/cfg_derive/src/impl/rule.rs | 112 ---------------------------- crates/cfg_derive/src/impl/token.rs | 110 --------------------------- crates/cfg_derive/src/lib.rs | 15 ---- 7 files changed, 267 deletions(-) delete mode 100644 crates/cfg_derive/Cargo.toml delete mode 100644 crates/cfg_derive/src/impl.rs delete mode 100644 crates/cfg_derive/src/impl/rule.rs delete mode 100644 crates/cfg_derive/src/impl/token.rs delete mode 100644 crates/cfg_derive/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index ecddf92..dd41696 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,7 +23,6 @@ version = "0.1.1" dependencies = [ "anyhow", "copager_cfg", - "copager_cfg_derive", "copager_core", "copager_ir", "copager_ir_sexp", @@ -46,17 +45,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "copager_cfg_derive" -version = "0.1.1" -dependencies = [ - "anyhow", - "proc-macro2", - "quote", - "syn", - "thiserror", -] - [[package]] name = "copager_core" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index 2d2168d..063d259 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" [dependencies] copager_core = { path = "./crates/core" } copager_cfg = { path = "./crates/cfg" } -copager_cfg_derive = { path = "./crates/cfg_derive" } copager_lex = { path = "./crates/lex" } copager_lex_derive = { path = "./crates/lex_derive" } copager_lex_regex = { path = "./crates/lex_regex" } @@ -30,7 +29,6 @@ resolver = "2" members = [ "./crates/core", "./crates/cfg", - "./crates/cfg_derive", "./crates/lex", "./crates/lex_derive", "./crates/lex_regex", diff --git a/crates/cfg_derive/Cargo.toml b/crates/cfg_derive/Cargo.toml deleted file mode 100644 index 810a51d..0000000 --- a/crates/cfg_derive/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "copager_cfg_derive" -version = "0.1.1" -edition = "2021" - -[dependencies] -anyhow = { workspace = true } -thiserror = { workspace = true } -proc-macro2 = "1.0" -quote = "1.0" -syn = { version ="2.0", features = ["full", "extra-traits"] } - -[lib] -proc-macro = true diff --git a/crates/cfg_derive/src/impl.rs b/crates/cfg_derive/src/impl.rs deleted file mode 100644 index f89ee34..0000000 --- a/crates/cfg_derive/src/impl.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod token; -pub mod rule; diff --git a/crates/cfg_derive/src/impl/rule.rs b/crates/cfg_derive/src/impl/rule.rs deleted file mode 100644 index dd0eff9..0000000 --- a/crates/cfg_derive/src/impl/rule.rs +++ /dev/null @@ -1,112 +0,0 @@ -use proc_macro2::TokenStream; -use quote::quote; -use syn::{Data, DeriveInput, Variant, Ident, LitStr}; - -pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { - let data_enum = if let Data::Enum(data_enum) = ast.data { - data_enum - } else { - panic!("\"RuleKind\" proc-macro is only implemented for enum.") - }; - - let parsed_variantes = data_enum - .variants - .iter() - .map(|variant| VariantInfo::parse(&ast.ident, variant)) - .collect::>(); - - let enum_name = &ast.ident; - let enum_assoc_type = format!("{}", enum_name) - .replace("Rule", "Token") - .parse::() - .unwrap(); - let enum_variants = parsed_variantes - .iter() - .map(|variant| variant.gen_ident()); - let enum_rule_table = parsed_variantes - .iter() - .map(|variant| variant.gen_ident_with_rule()); - - quote! { - impl<'a> Syntax<'a> for #enum_name { - type TokenKind = #enum_assoc_type; - - fn into_rules(&self) -> Vec> { - match self { - #( #enum_rule_table, )* - _ => unimplemented!(), - } - } - - fn into_iter() -> impl Iterator { - vec![ - #( #enum_variants, )* - ].into_iter() - } - } - } -} - -struct VariantInfo<'a> { - parent_ident: &'a Ident, - self_ident: &'a Ident, - rules: Vec, -} - -impl<'a> VariantInfo<'a> { - fn parse(parent_ident: &'a Ident, variant: &'a Variant) -> VariantInfo<'a> { - let self_ident = &variant.ident; - - let mut rules = vec![]; - for attr in &variant.attrs { - let attr = attr.parse_args::().unwrap().value(); - rules.push(Self::parse_rule(&attr)); - } - - VariantInfo { - parent_ident, - self_ident, - rules, - } - } - - fn parse_rule(s: &str) -> TokenStream { - let mut splitted = s.split("::="); - - let lhs = splitted.next().unwrap().trim(); - let lhs = &lhs[1..lhs.len() - 1]; - let lhs = quote! { RuleElem::new_nonterm(#lhs) }; - - let rhs = splitted.collect::() - .split_whitespace() - .map(|s| { - if s.starts_with('<') { - let s = &s[1..s.len() - 1]; - quote! { RuleElem::new_nonterm(#s) } - } else { - let ident = s.parse::().unwrap(); - quote! { RuleElem::new_term(Self::TokenSet::#ident) } - } - }) - .collect::>(); - - quote! { Rule::from((#lhs, vec![ #( #rhs, )* ])) } - } - - fn gen_ident(&self) -> TokenStream { - let parent_ident = self.parent_ident; - let self_ident = self.self_ident; - - quote! { #parent_ident :: #self_ident } - } - - fn gen_ident_with_rule(&self) -> TokenStream { - let ident = self.gen_ident(); - if self.rules.is_empty() { - quote! { #ident => unimplemented!() } - } else { - let rules = &self.rules; - quote! { #ident => vec![#(#rules),*] } - } - } -} diff --git a/crates/cfg_derive/src/impl/token.rs b/crates/cfg_derive/src/impl/token.rs deleted file mode 100644 index f092ab3..0000000 --- a/crates/cfg_derive/src/impl/token.rs +++ /dev/null @@ -1,110 +0,0 @@ -use proc_macro2::TokenStream; -use quote::quote; -use syn::{Data, DeriveInput, Variant, Ident, LitStr}; - -pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { - let data_enum = if let Data::Enum(data_enum) = ast.data { - data_enum - } else { - panic!("\"TokenKind\" proc-macro is only implemented for enum.") - }; - - let parsed_variantes = data_enum - .variants - .iter() - .map(|variant| VariantInfo::parse(&ast.ident, variant)) - .collect::>(); - - let enum_name = &ast.ident; - let enum_ignored = parsed_variantes - .iter() - .find(|variant| variant.ignored) - .map(|variant| variant.regex.as_ref().unwrap().as_str()) - .unwrap_or(""); - let enum_variants = parsed_variantes - .iter() - .filter(|variant| !variant.ignored) - .map(|variant| variant.gen_ident()); - let enum_regex_table = parsed_variantes - .iter() - .filter(|variant| !variant.ignored) - .map(|variant| variant.gen_ident_with_regex()); - - quote! { - impl TokenKind<'_> for #enum_name { - fn as_str(&self) -> &'static str { - match self { - #( #enum_regex_table, )* - _ => unimplemented!(), - } - } - - fn ignore_str() -> &'static str { - #enum_ignored - } - - fn into_iter() -> impl Iterator { - vec![ - #( #enum_variants, )* - ].into_iter() - } - } - } -} - -#[derive(Debug)] -struct VariantInfo<'a> { - parent_ident: &'a Ident, - self_ident: &'a Ident, - regex: Option, - ignored: bool, -} - -impl<'a> VariantInfo<'a> { - fn parse(parent_ident: &'a Ident, variant: &'a Variant) -> VariantInfo<'a> { - let self_ident = &variant.ident; - - let mut regex = None; - let mut ignored = false; - for attr in &variant.attrs { - let _ = attr.parse_nested_meta(|meta| { - // #[...(text = "...")] - if meta.path.is_ident("text") { - let raw_regex = meta.value()?.parse::()?.value(); - regex = Some(format!("^{}", raw_regex)); - return Ok(()); - } - - // #[...(ignord)] - if meta.path.is_ident("ignored") { - ignored = true; - return Ok(()); - } - - Err(meta.error("Unknown attribute")) - }); - } - - VariantInfo { - parent_ident, - self_ident, - regex, - ignored, - } - } - - fn gen_ident(&self) -> TokenStream { - let parent_ident = self.parent_ident; - let self_ident = self.self_ident; - - quote! { #parent_ident :: #self_ident } - } - - fn gen_ident_with_regex(&self) -> TokenStream { - let ident = self.gen_ident(); - match &self.regex { - Some(regex) => quote! { #ident => #regex }, - None => quote! { unimplemented!() }, - } - } -} diff --git a/crates/cfg_derive/src/lib.rs b/crates/cfg_derive/src/lib.rs deleted file mode 100644 index a1bb051..0000000 --- a/crates/cfg_derive/src/lib.rs +++ /dev/null @@ -1,15 +0,0 @@ -mod r#impl; - -use syn::{parse_macro_input, DeriveInput}; - -#[proc_macro_derive(TokenKind, attributes(token))] -pub fn derive_tokenset(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let ast = parse_macro_input!(input as DeriveInput); - r#impl::token::proc_macro_impl(ast).into() -} - -#[proc_macro_derive(RuleKind, attributes(rule))] -pub fn derive_syntax(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let ast = parse_macro_input!(input as DeriveInput); - r#impl::rule::proc_macro_impl(ast).into() -} From 6d14a0b625976e1b6ec0097363f419fadd1ff0ff Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 18:20:52 +0900 Subject: [PATCH 22/55] =?UTF-8?q?[update]=20copager=20=E3=82=AF=E3=83=AC?= =?UTF-8?q?=E3=83=BC=E3=83=88=E7=9B=B4=E4=B8=8B=E3=81=AE=E3=83=A2=E3=82=B8?= =?UTF-8?q?=E3=83=A5=E3=83=BC=E3=83=AB=E6=A7=8B=E6=88=90=E3=82=92=E6=9B=B4?= =?UTF-8?q?=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 3 --- Cargo.toml | 26 +++++++++++++++++--------- crates/lex_regex/src/lib.rs | 2 +- src/lib.rs | 29 +++++++++++++++++++++++++---- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd41696..2823f4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,12 +27,9 @@ dependencies = [ "copager_ir", "copager_ir_sexp", "copager_lex", - "copager_lex_derive", "copager_lex_regex", "copager_parse", - "copager_parse_derive", "copager_parse_lr1", - "copager_utils", "serde", "serde_json", ] diff --git a/Cargo.toml b/Cargo.toml index 063d259..8b2ad2e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,15 +6,12 @@ edition = "2021" [dependencies] copager_core = { path = "./crates/core" } copager_cfg = { path = "./crates/cfg" } -copager_lex = { path = "./crates/lex" } -copager_lex_derive = { path = "./crates/lex_derive" } -copager_lex_regex = { path = "./crates/lex_regex" } -copager_parse = { path = "./crates/parse" } -copager_parse_derive = { path = "./crates/parse_derive" } -copager_parse_lr1 = { path = "./crates/parse_lr1" } +copager_lex = { path = "./crates/lex", optional = true } +copager_lex_regex = { path = "./crates/lex_regex", optional = true } +copager_parse = { path = "./crates/parse", optional = true } +copager_parse_lr1 = { path = "./crates/parse_lr1", optional = true } copager_ir = { path = "./crates/ir" } -copager_ir_sexp = { path = "./crates/ir_sexp" } -copager_utils = { path = "./crates/utils" } +copager_ir_sexp = { path = "./crates/ir_sexp", optional = true } [dev-dependencies] anyhow = { workspace = true } @@ -22,7 +19,18 @@ serde = { workspace = true } serde_json = "1.0.117" [features] -default = [] +# common +default = ["copager_lex", "copager_parse"] +derive = ["copager_lex/derive", "copager_parse/derive"] + +# lex +regexlex = ["copager_lex_regex"] + +# parse +lr1 = ["copager_parse_lr1"] + +# ir +sexp = ["copager_ir_sexp"] [workspace] resolver = "2" diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index 5465f23..81bbefa 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -6,7 +6,7 @@ use copager_cfg::token::{TokenTag, Token}; use copager_lex::{LexSource, LexIterator}; #[derive(Debug)] -struct RegexLexer<'input, S: LexSource> { +pub struct RegexLexer<'input, S: LexSource> { // regex regex_istr: Rc, regex_set: Rc, diff --git a/src/lib.rs b/src/lib.rs index f33b1ed..e8025e3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,25 @@ -// pub use copager_core::*; -// pub use copager_cfg as cfg; -// pub use copager_lex as lex; -// pub use copager_algorithm as algorithm; +pub use copager_core::*; +pub use copager_cfg as cfg; + +pub mod lex { + pub use copager_lex::*; + #[cfg(feature = "regexlex")] + pub use copager_lex_regex::*; +} + +pub mod parse { + pub use copager_parse::*; + #[cfg(feature = "lr1")] + pub use copager_parse_lr1::*; +} + +pub mod ir { + pub use copager_ir::*; + #[cfg(feature = "sexp")] + pub use copager_ir_sexp::*; +} + +pub mod prelude { + pub use copager_cfg::rule::{RuleTag, Rule, RuleElem}; + pub use copager_cfg::token::TokenTag; +} From c396e4a03fdef21a2421fd9dbf297ae2e8363a90 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 18:36:30 +0900 Subject: [PATCH 23/55] [update] examples/ tests/ --- Cargo.lock | 1 + Cargo.toml | 2 + examples/expr.rs | 97 +++++++++++++++++++----------------------------- src/lib.rs | 6 +-- tests/derive.rs | 77 +++++++++++++++++++------------------- 5 files changed, 83 insertions(+), 100 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2823f4d..f8aa610 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,6 +22,7 @@ name = "copager" version = "0.1.1" dependencies = [ "anyhow", + "copager", "copager_cfg", "copager_core", "copager_ir", diff --git a/Cargo.toml b/Cargo.toml index 8b2ad2e..9db0470 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,11 +17,13 @@ copager_ir_sexp = { path = "./crates/ir_sexp", optional = true } anyhow = { workspace = true } serde = { workspace = true } serde_json = "1.0.117" +copager = { path = ".", features = ["derive", "all"] } [features] # common default = ["copager_lex", "copager_parse"] derive = ["copager_lex/derive", "copager_parse/derive"] +all = ["copager_lex_regex", "copager_parse_lr1", "copager_ir_sexp"] # lex regexlex = ["copager_lex_regex"] diff --git a/examples/expr.rs b/examples/expr.rs index 6671500..5b4a6f7 100644 --- a/examples/expr.rs +++ b/examples/expr.rs @@ -1,61 +1,42 @@ -// use std::io::stdin; +use copager::lex::LexSource; +use copager::parse::ParseSource; +use copager::prelude::*; -// use copager::algorithm::LR1; -// use copager::cfg::*; -// use copager::error::ParseError; -// use copager::Parser; +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum ExprToken { + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} -// #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] -// enum ExprTokenSet { -// #[token(regex = r"\+")] -// Plus, -// #[token(regex = r"-")] -// Minus, -// #[token(regex = r"\*")] -// Mul, -// #[token(regex = r"/")] -// Div, -// #[token(regex = r"\(")] -// BracketL, -// #[token(regex = r"\)")] -// BracketR, -// #[token(regex = r"[1-9][0-9]*")] -// Num, -// #[token(regex = r"[ \t\n]+", ignored)] -// _Whitespace, -// } +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum ExprRule { + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} -// #[derive(Debug, Clone, Copy, Syntax)] -// enum ExprSyntax { -// #[rule(" ::= Plus ")] -// #[rule(" ::= Minus ")] -// #[rule(" ::= ")] -// Expr, -// #[rule(" ::= Mul ")] -// #[rule(" ::= Div ")] -// #[rule(" ::= ")] -// Term, -// #[rule(" ::= BracketL BracketR")] -// #[rule(" ::= Num")] -// Num, -// } - -// type ExprParser<'a> = Parser::<'a, LR1<'a, ExprTokenSet, ExprSyntax>>; - -// fn main() -> anyhow::Result<()> { -// let mut input = String::new(); -// stdin().read_line(&mut input)?; - -// match ExprParser::new()?.parse(&input) { -// Ok(sexp) => println!("Accepted : {}", sexp), -// Err(e) => { -// if let Some(e) = e.downcast_ref::() { -// e.pretty_print(); -// } -// println!("Rejected : {}", e); -// } -// }; - -// Ok(()) -// } -fn main() {} +fn main() -> anyhow::Result<()> { + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index e8025e3..bb5c5c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,19 +3,19 @@ pub use copager_cfg as cfg; pub mod lex { pub use copager_lex::*; - #[cfg(feature = "regexlex")] + #[cfg(any(feature = "all", feature = "regexlex"))] pub use copager_lex_regex::*; } pub mod parse { pub use copager_parse::*; - #[cfg(feature = "lr1")] + #[cfg(any(feature = "all", feature = "lr1"))] pub use copager_parse_lr1::*; } pub mod ir { pub use copager_ir::*; - #[cfg(feature = "sexp")] + #[cfg(any(feature = "all", feature = "sexp"))] pub use copager_ir_sexp::*; } diff --git a/tests/derive.rs b/tests/derive.rs index 196cd67..07d512b 100644 --- a/tests/derive.rs +++ b/tests/derive.rs @@ -1,42 +1,41 @@ -// use copager::cfg::*; +use copager::lex::LexSource; +use copager::parse::ParseSource; +use copager::prelude::*; -// #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] -// enum TestTokenSet { -// #[token(regex = r"\+")] -// Plus, -// #[token(regex = r"-")] -// Minus, -// #[token(regex = r"\*")] -// Mul, -// #[token(regex = r"/")] -// Div, -// #[token(regex = r"\(")] -// BracketL, -// #[token(regex = r"\)")] -// BracketR, -// #[token(regex = r"[1-9][0-9]*")] -// Num, -// #[token(regex = r"[ \t\n]+", ignored)] -// _Whitespace, -// } +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum ExprToken { + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} -// #[derive(Debug, Clone, Copy, Syntax)] -// enum TestSyntax { -// #[rule(" ::= Plus ")] -// #[rule(" ::= Minus ")] -// #[rule(" ::= ")] -// Expr, -// #[rule(" ::= Mul ")] -// #[rule(" ::= Div ")] -// #[rule(" ::= ")] -// Term, -// #[rule(" ::= BracketL BracketR")] -// #[rule(" ::= Num")] -// Num, -// } +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum ExprRule { + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} -// #[test] -// fn check_compile() { -// let _ = TestTokenSet::into_regex(&self::TestTokenSet::Plus); -// let _ = TestSyntax::into_rules(&self::TestSyntax::Expr); -// } +#[test] +fn check_compile() {} From 33f3c8aee8e839787a4f3304c9a62837ca537feb Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 21:48:47 +0900 Subject: [PATCH 24/55] [change] 2021 edition -> 2024 edition --- Cargo.toml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9db0470..0e1b5e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,9 @@ +cargo-features = ["edition2024"] + [package] name = "copager" version = "0.1.1" -edition = "2021" +edition = "2024" [dependencies] copager_core = { path = "./crates/core" } @@ -21,18 +23,18 @@ copager = { path = ".", features = ["derive", "all"] } [features] # common -default = ["copager_lex", "copager_parse"] +default = ["dep:copager_lex", "dep:copager_parse"] derive = ["copager_lex/derive", "copager_parse/derive"] -all = ["copager_lex_regex", "copager_parse_lr1", "copager_ir_sexp"] +all = ["dep:copager_lex_regex", "dep:copager_parse_lr1", "dep:copager_ir_sexp"] # lex -regexlex = ["copager_lex_regex"] +regexlex = ["dep:copager_lex_regex"] # parse -lr1 = ["copager_parse_lr1"] +lr1 = ["dep:copager_parse_lr1"] # ir -sexp = ["copager_ir_sexp"] +sexp = ["dep:copager_ir_sexp"] [workspace] resolver = "2" From 1ae65d2daa2bbfb65c2dbf1af75aa8e0c174a454 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 22:19:08 +0900 Subject: [PATCH 25/55] [update] {Lex, Parse}Iterator -> {Lex, Parse}Driver --- crates/core/src/lib.rs | 29 ++++++++--------- crates/lex/src/lib.rs | 5 ++- crates/lex_regex/src/lib.rs | 65 +++++++++++++++++-------------------- crates/parse/src/lib.rs | 11 +++---- crates/parse_lr1/src/lib.rs | 51 +++++++++++------------------ 5 files changed, 70 insertions(+), 91 deletions(-) diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 15a7099..863cb28 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -2,41 +2,40 @@ pub mod error; use std::marker::PhantomData; -use copager_lex::{LexSource, LexIterator}; -use copager_parse::{ParseSource, ParseIterator}; +use copager_lex::{LexSource, LexDriver}; +use copager_parse::{ParseSource, ParseDriver}; -pub struct Processor<'input, Sl, Il, Sp, Ip> +pub struct Processor<'input, Sl, Dl, Sp, Dp> where Sl: LexSource, - Il: LexIterator<'input, Sl::Tag>, + Dl: LexDriver<'input, Sl::Tag>, Sp: ParseSource, - Ip: ParseIterator<'input, Sl::Tag, Sp::Tag, Il>, + Dp: ParseDriver<'input, Sl::Tag, Sp::Tag>, { _phantom_sl: PhantomData, - _phantom_il: PhantomData, + _phantom_il: PhantomData
, _phantom_sp: PhantomData, - _phantom_ip: PhantomData, + _phantom_ip: PhantomData, _phantom_input: PhantomData<&'input ()>, } -impl<'input, 'cache, Sl, Il, Sp, Ip> Processor<'input, Sl, Il, Sp, Ip> +impl<'input, 'cache, Sl, Dl, Sp, Dp> Processor<'input, Sl, Dl, Sp, Dp> where Sl: LexSource, - Il: LexIterator<'input, Sl::Tag, From = Sl>, + Dl: LexDriver<'input, Sl::Tag, From = Sl>, Sp: ParseSource, - Ip: ParseIterator<'input, Sl::Tag, Sp::Tag, Il, From = Sp>, + Dp: ParseDriver<'input, Sl::Tag, Sp::Tag, From = Sp>, { pub fn process(input: &'input str) where Sl: Default, Sp: Default, { - let lexer = Il::from(Sl::default()).init(input); - let mut parser = Ip::from(Sp::default()).init(lexer); + let lexer = Dl::from(Sl::default()); + let parser = Dp::from(Sp::default()); loop { - match parser.next() { - Some(_) => {} - None => break, + for _ in parser.init(lexer.init(input)) { + println!("-----"); } } } diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs index 744fa1a..939f000 100644 --- a/crates/lex/src/lib.rs +++ b/crates/lex/src/lib.rs @@ -9,13 +9,12 @@ pub trait LexSource { fn iter(&self) -> impl Iterator; } -pub trait LexIterator<'input, T> +pub trait LexDriver<'input, T> where Self: Sized + From, T: TokenTag, { type From; - fn init(&self, input: &'input str) -> Self; - fn next(&mut self) -> Option>; + fn init(&self, input: &'input str) -> impl Iterator>; } diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index 81bbefa..77a28b3 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -1,9 +1,11 @@ +#![feature(gen_blocks)] + use std::rc::Rc; use regex::{Regex, RegexSet}; use copager_cfg::token::{TokenTag, Token}; -use copager_lex::{LexSource, LexIterator}; +use copager_lex::{LexSource, LexDriver}; #[derive(Debug)] pub struct RegexLexer<'input, S: LexSource> { @@ -42,48 +44,41 @@ where } } -impl<'input, T, S> LexIterator<'input, T> for RegexLexer<'input, S> +impl<'input, T, S> LexDriver<'input, T> for RegexLexer<'input, S> where T: TokenTag, S: LexSource, { type From = S; - fn init(&self, input: &'input str) -> Self { - RegexLexer { - regex_istr: Rc::clone(&self.regex_istr), - regex_set: Rc::clone(&self.regex_set), - regex_map: Rc::clone(&self.regex_map), - input: input, - pos: 0, - } - } - - fn next(&mut self) -> Option> { - // Skip Spaces - let remain = match self.regex_istr.find(&self.input[self.pos..]) { - Some(acc_s) => { - self.pos += acc_s.len(); - &self.input[self.pos..] - } - None => &self.input[self.pos..] - }; + gen fn init(&self, input: &'input str) -> impl Iterator> { + let pos = 0; + loop { + // Skip Spaces + let remain = match self.regex_istr.find(&input[pos..]) { + Some(acc_s) => { + self.pos += acc_s.len(); + &input[pos..] + } + None => &input[pos..] + }; - // Find the token - let mut matches = self - .regex_set - .matches(remain) - .into_iter() - .map(|idx| &self.regex_map[idx]) - .map(|(regex, token)| (*token, regex.find(remain).unwrap().as_str())) - .collect::>(); - matches.sort_by(|(_, a), (_, b)| a.len().cmp(&b.len())); + // Find the token + let mut matches = self + .regex_set + .matches(remain) + .into_iter() + .map(|idx| &self.regex_map[idx]) + .map(|(regex, token)| (*token, regex.find(remain).unwrap().as_str())) + .collect::>(); + matches.sort_by(|(_, a), (_, b)| a.len().cmp(&b.len())); - // Update myself - let (token, acc_s) = matches.first()?; - let range = (self.pos, self.pos + acc_s.len()); - self.pos += acc_s.len(); + // Update myself + let (token, acc_s) = matches.first()?; + let range = (pos, pos + acc_s.len()); + self.pos += acc_s.len(); - Some(Token::new(*token, &self.input, range)) + yield Token::new(*token, &input, range); + } } } diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 5f07a94..772f177 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -1,6 +1,5 @@ -use copager_cfg::token::TokenTag; +use copager_cfg::token::{TokenTag, Token}; use copager_cfg::rule::{RuleTag, RuleSet}; -use copager_lex::LexIterator; #[cfg(feature = "derive")] pub use copager_parse_derive::ParseSource; @@ -26,15 +25,15 @@ pub trait ParseSource { } } -pub trait ParseIterator<'input, T, R, Il> +pub trait ParseDriver<'input, T, R> where Self: From, T: TokenTag, R: RuleTag, - Il: LexIterator<'input, T>, { type From; - fn init(&self, lexer: Il) -> Self; - fn next(&mut self) -> Option<()>; + fn init(&self, lexer: Il) -> impl Iterator + where + Il: Iterator>; } diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index 743dcd9..6e75d66 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -1,3 +1,5 @@ +#![feature(gen_blocks)] + mod error; mod builder; @@ -5,36 +7,30 @@ use std::marker::PhantomData; use serde::{Serialize, Deserialize}; -use copager_lex::{LexSource, LexIterator}; -use copager_parse::{ParseSource, ParseIterator}; +use copager_lex::{LexSource, LexDriver}; +use copager_parse::{ParseSource, ParseDriver}; use copager_utils::cache::Cacheable; use builder::{LR1Configure, LRAction}; use error::ParseError; #[derive(Debug)] -pub struct LR1<'cache, 'input, Sl, Il, Sp> +pub struct LR1<'cache, 'input, Sl, Sp> where Sl: LexSource, - Il: LexIterator<'input, Sl::Tag>, Sp: ParseSource, { // LR-Table tables: &'cache LR1Configure, - // States - lexer: Option, - stack: Option>, - // Phantom Data _phantom: PhantomData<&'input ()>, } -impl<'cache, 'input, Sl, Il, Sp> Cacheable<'cache, (Sl, Sp)> for LR1<'cache, 'input, Sl, Il, Sp> +impl<'cache, 'input, Sl, Sp> Cacheable<'cache, (Sl, Sp)> for LR1<'cache, 'input, Sl, Sp> where Sl: LexSource, Sl::Tag: Serialize + for<'de> Deserialize<'de>, - Il: LexIterator<'input, Sl::Tag>, Sp: ParseSource, Sp::Tag: Serialize + for<'de> Deserialize<'de>, { @@ -49,42 +45,31 @@ where } } -impl<'cache, 'input, Sl, Il, Sp> From<&'cache LR1Configure> for LR1<'cache, 'input, Sl, Il, Sp> +impl<'cache, 'input, Sl, Sp> From<&'cache LR1Configure> for LR1<'cache, 'input, Sl, Sp> where Sl: LexSource, - Il: LexIterator<'input, Sl::Tag>, Sp: ParseSource, { fn from(tables: &'cache LR1Configure) -> Self { LR1 { tables, - lexer: None, - stack: None, _phantom: PhantomData, } } } -impl<'cache, 'input, Sl, Il, Sp> ParseIterator<'input, Sl::Tag, Sp::Tag, Il> for LR1<'cache, 'input, Sl, Il, Sp> +impl<'cache, 'input, Sl, Sp> ParseDriver<'input, Sl::Tag, Sp::Tag> for LR1<'cache, 'input, Sl, Sp> where Sl: LexSource, - Il: LexIterator<'input, Sl::Tag>, Sp: ParseSource, { type From = &'cache LR1Configure; - fn init(&self, lexer: Il) -> Self { - LR1 { - tables: &self.tables, - lexer: Some(lexer), - stack: Some(Vec::new()), - _phantom: PhantomData, - } - } - - fn next(&mut self) -> Option<()> { - let lexer = self.lexer.as_mut().unwrap(); - let stack = self.stack.as_mut().unwrap(); + gen fn init(&self, mut lexer: Il) -> impl Iterator + where + Il: Iterator>, + { + let mut stack = vec![]; loop { let input = lexer.next(); loop { @@ -114,16 +99,18 @@ where } (LRAction::Accept, _) => { // return builder.build(); - return Some(()); + println!("Done!"); + return; } (LRAction::None, Some(token)) => { // return Err(ParseError::new_unexpected_token(token).into()); - println!("Done!"); - return None; + println!("Unexpected: {:?}", token); + return; } (LRAction::None, None) => { // return Err(ParseError::UnexpectedEOF.into()); - return None; + println!("Unexpected EOF"); + return; } _ => unreachable!(), } From bcffcaf0fd9a34264d1af256360e5ee55844f394 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 22:39:31 +0900 Subject: [PATCH 26/55] =?UTF-8?q?[add]=20lex=5Fregex=20=E3=81=AB=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 + crates/lex/src/lib.rs | 4 +-- crates/lex_regex/Cargo.toml | 9 ++++- crates/lex_regex/src/lib.rs | 28 +++++++--------- crates/lex_regex/tests/simple.rs | 56 ++++++++++++++++++++++++++++++++ 5 files changed, 79 insertions(+), 19 deletions(-) create mode 100644 crates/lex_regex/tests/simple.rs diff --git a/Cargo.lock b/Cargo.lock index f8aa610..1f612d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,7 @@ dependencies = [ "anyhow", "copager_cfg", "copager_lex", + "copager_lex_regex", "copager_utils", "regex", "regex-macro", diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs index 939f000..3a0ad8f 100644 --- a/crates/lex/src/lib.rs +++ b/crates/lex/src/lib.rs @@ -9,12 +9,12 @@ pub trait LexSource { fn iter(&self) -> impl Iterator; } -pub trait LexDriver<'input, T> +pub trait LexDriver where Self: Sized + From, T: TokenTag, { type From; - fn init(&self, input: &'input str) -> impl Iterator>; + fn init<'input>(&self, input: &'input str) -> impl Iterator>; } diff --git a/crates/lex_regex/Cargo.toml b/crates/lex_regex/Cargo.toml index 40cc31d..e08ee94 100644 --- a/crates/lex_regex/Cargo.toml +++ b/crates/lex_regex/Cargo.toml @@ -1,7 +1,9 @@ +cargo-features = ["edition2024"] + [package] name = "copager_lex_regex" version = "0.1.1" -edition = "2021" +edition = "2024" [dependencies] anyhow = { workspace = true } @@ -11,3 +13,8 @@ regex-macro = "0.2.0" copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_utils = { path = "../utils" } + +[dev-dependencies] +copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "." } diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index 77a28b3..9abe16e 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -8,18 +8,13 @@ use copager_cfg::token::{TokenTag, Token}; use copager_lex::{LexSource, LexDriver}; #[derive(Debug)] -pub struct RegexLexer<'input, S: LexSource> { - // regex +pub struct RegexLexer { regex_istr: Rc, regex_set: Rc, regex_map: Rc>, - - // state - input: &'input str, - pos: usize, } -impl<'input, T, S> From for RegexLexer<'input, S> +impl From for RegexLexer where T: TokenTag, S: LexSource, @@ -38,26 +33,24 @@ where regex_istr: Rc::new(regex_istr), regex_set: Rc::new(regex_set), regex_map: Rc::new(regex_map), - input: "", - pos: 0, } } } -impl<'input, T, S> LexDriver<'input, T> for RegexLexer<'input, S> +impl LexDriver for RegexLexer where T: TokenTag, S: LexSource, { type From = S; - gen fn init(&self, input: &'input str) -> impl Iterator> { - let pos = 0; + gen fn init<'input>(&self, input: &'input str) -> Token<'input, T> { + let mut pos = 0; loop { // Skip Spaces let remain = match self.regex_istr.find(&input[pos..]) { Some(acc_s) => { - self.pos += acc_s.len(); + pos += acc_s.len(); &input[pos..] } None => &input[pos..] @@ -73,10 +66,13 @@ where .collect::>(); matches.sort_by(|(_, a), (_, b)| a.len().cmp(&b.len())); - // Update myself - let (token, acc_s) = matches.first()?; + // Update pos + let (token, acc_s) = match matches.first() { + Some(a) => a, + None => return, + }; let range = (pos, pos + acc_s.len()); - self.pos += acc_s.len(); + pos += acc_s.len(); yield Token::new(*token, &input, range); } diff --git a/crates/lex_regex/tests/simple.rs b/crates/lex_regex/tests/simple.rs new file mode 100644 index 0000000..45813f5 --- /dev/null +++ b/crates/lex_regex/tests/simple.rs @@ -0,0 +1,56 @@ +use copager_cfg::token::{TokenTag, Token}; +use copager_lex::{LexSource, LexDriver}; +use copager_lex_regex::RegexLexer; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[test] +fn simple_success() { + let lexer = RegexLexer::from(ExprToken::default()); + let mut lexer = lexer.init("1 + 2 * 3"); + assert_eq_token(lexer.next(), "1"); + assert_eq_token(lexer.next(), "+"); + assert_eq_token(lexer.next(), "2"); + assert_eq_token(lexer.next(), "*"); + assert_eq_token(lexer.next(), "3"); + assert!(lexer.next().is_none()); +} + +#[test] +#[should_panic] +fn simple_failed() { + let lexer = RegexLexer::from(ExprToken::default()); + let mut lexer = lexer.init("1 + 2 * stop 3"); + assert_eq_token(lexer.next(), "1"); + assert_eq_token(lexer.next(), "+"); + assert_eq_token(lexer.next(), "2"); + assert_eq_token(lexer.next(), "*"); + assert_eq_token(lexer.next(), "3"); + assert!(lexer.next().is_none()); +} + +fn assert_eq_token(token: Option>, s: &str) { + match token { + Some(token) => assert_eq!(token.as_str(), s), + None => panic!("unexpected eof"), + } +} From d06fbe1ac39307614364fa83f156a5e2215a8de2 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 23:14:49 +0900 Subject: [PATCH 27/55] =?UTF-8?q?[add]=20parse=5Flr1=20=E3=81=AB=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 2 + crates/core/src/lib.rs | 15 ++++---- crates/parse/src/lib.rs | 4 +- crates/parse_lr1/Cargo.toml | 10 ++++- crates/parse_lr1/src/builder.rs | 4 +- crates/parse_lr1/src/lib.rs | 45 +++++++++------------- crates/parse_lr1/tests/simple.rs | 66 ++++++++++++++++++++++++++++++++ 7 files changed, 106 insertions(+), 40 deletions(-) create mode 100644 crates/parse_lr1/tests/simple.rs diff --git a/Cargo.lock b/Cargo.lock index 1f612d4..e332b3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,7 +146,9 @@ dependencies = [ "copager_cfg", "copager_core", "copager_lex", + "copager_lex_regex", "copager_parse", + "copager_parse_lr1", "copager_utils", "itertools", "serde", diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 863cb28..9143bca 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -5,28 +5,27 @@ use std::marker::PhantomData; use copager_lex::{LexSource, LexDriver}; use copager_parse::{ParseSource, ParseDriver}; -pub struct Processor<'input, Sl, Dl, Sp, Dp> +pub struct Processor where Sl: LexSource, - Dl: LexDriver<'input, Sl::Tag>, + Dl: LexDriver, Sp: ParseSource, - Dp: ParseDriver<'input, Sl::Tag, Sp::Tag>, + Dp: ParseDriver, { _phantom_sl: PhantomData, _phantom_il: PhantomData
, _phantom_sp: PhantomData, _phantom_ip: PhantomData, - _phantom_input: PhantomData<&'input ()>, } -impl<'input, 'cache, Sl, Dl, Sp, Dp> Processor<'input, Sl, Dl, Sp, Dp> +impl<'cache, Sl, Dl, Sp, Dp> Processor where Sl: LexSource, - Dl: LexDriver<'input, Sl::Tag, From = Sl>, + Dl: LexDriver, Sp: ParseSource, - Dp: ParseDriver<'input, Sl::Tag, Sp::Tag, From = Sp>, + Dp: ParseDriver, { - pub fn process(input: &'input str) + pub fn process<'input>(input: &'input str) where Sl: Default, Sp: Default, diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 772f177..4d39fe2 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -25,7 +25,7 @@ pub trait ParseSource { } } -pub trait ParseDriver<'input, T, R> +pub trait ParseDriver where Self: From, T: TokenTag, @@ -33,7 +33,7 @@ where { type From; - fn init(&self, lexer: Il) -> impl Iterator + fn init<'input, Il>(&self, lexer: Il) -> impl Iterator where Il: Iterator>; } diff --git a/crates/parse_lr1/Cargo.toml b/crates/parse_lr1/Cargo.toml index 14df8fa..dee2c56 100644 --- a/crates/parse_lr1/Cargo.toml +++ b/crates/parse_lr1/Cargo.toml @@ -1,7 +1,9 @@ +cargo-features = ["edition2024"] + [package] name = "copager_parse_lr1" version = "0.1.1" -edition = "2021" +edition = "2024" [dependencies] anyhow = { workspace = true } @@ -13,3 +15,9 @@ copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } copager_utils = { path = "../utils" } + +[dev-dependencies] +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_parse_lr1 = { path = "../parse_lr1" } diff --git a/crates/parse_lr1/src/builder.rs b/crates/parse_lr1/src/builder.rs index de05ad1..5978ed8 100644 --- a/crates/parse_lr1/src/builder.rs +++ b/crates/parse_lr1/src/builder.rs @@ -55,7 +55,7 @@ where let lr_items = lr_items.expand_closure(&ruleset, &first_set); // 3. Generate a DFA - let dfa = LRItemDFA::gen(lr_items, &ruleset, &first_set); + let dfa = LRItemDFA::r#gen(lr_items, &ruleset, &first_set); // 4. Initialize tables let mut idx = 0; @@ -147,7 +147,7 @@ struct LRItemDFA<'a, T: TokenTag> ( ); impl<'a, T: TokenTag> LRItemDFA<'a, T> { - fn gen( + fn r#gen( init_set: LRItemSet<'a, T>, ruleset: &'a RuleSet, first_set: &HashMap<&'a RuleElem, Vec<&'a RuleElem>>, diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index 6e75d66..91c6b29 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -3,11 +3,12 @@ mod error; mod builder; -use std::marker::PhantomData; +use std::collections::HashMap; use serde::{Serialize, Deserialize}; -use copager_lex::{LexSource, LexDriver}; +use copager_cfg::token::Token; +use copager_lex::LexSource; use copager_parse::{ParseSource, ParseDriver}; use copager_utils::cache::Cacheable; @@ -15,19 +16,15 @@ use builder::{LR1Configure, LRAction}; use error::ParseError; #[derive(Debug)] -pub struct LR1<'cache, 'input, Sl, Sp> +pub struct LR1<'cache, Sl, Sp> where Sl: LexSource, Sp: ParseSource, { - // LR-Table tables: &'cache LR1Configure, - - // Phantom Data - _phantom: PhantomData<&'input ()>, } -impl<'cache, 'input, Sl, Sp> Cacheable<'cache, (Sl, Sp)> for LR1<'cache, 'input, Sl, Sp> +impl<'cache, Sl, Sp> Cacheable<'cache, (Sl, Sp)> for LR1<'cache, Sl, Sp> where Sl: LexSource, Sl::Tag: Serialize + for<'de> Deserialize<'de>, @@ -45,50 +42,44 @@ where } } -impl<'cache, 'input, Sl, Sp> From<&'cache LR1Configure> for LR1<'cache, 'input, Sl, Sp> +impl<'cache, Sl, Sp> From<&'cache LR1Configure> for LR1<'cache, Sl, Sp> where Sl: LexSource, Sp: ParseSource, { fn from(tables: &'cache LR1Configure) -> Self { - LR1 { - tables, - _phantom: PhantomData, - } + LR1 { tables } } } -impl<'cache, 'input, Sl, Sp> ParseDriver<'input, Sl::Tag, Sp::Tag> for LR1<'cache, 'input, Sl, Sp> +impl<'cache, Sl, Sp> ParseDriver for LR1<'cache, Sl, Sp> where Sl: LexSource, Sp: ParseSource, { type From = &'cache LR1Configure; - gen fn init(&self, mut lexer: Il) -> impl Iterator + gen fn init<'input, Il>(&self, mut lexer: Il) where Il: Iterator>, { - let mut stack = vec![]; + let mut stack = vec![0]; loop { - let input = lexer.next(); + let token = lexer.next(); loop { let top = stack[stack.len() - 1]; - let action = match input { - Some(token) => ( - self.tables.action_table[top].get(&token.kind).unwrap(), - Some(token), - ), - None => ( - &self.tables.eof_action_table[top], - None - ), + let action = match token { + Some(token) => { + let local_action_table: &HashMap<_, _> = &self.tables.action_table[top]; + (local_action_table.get(&token.kind).unwrap(), Some(token)) + }, + None => (&self.tables.eof_action_table[top], None), }; match action { (LRAction::Shift(new_state), Some(token)) => { stack.push(*new_state); // builder.push(token); - println!("Shift: {:?}", token); + println!("Shift: {}", token.as_str()); break; } (LRAction::Reduce(tag, goto, elems_cnt), _) => { diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr1/tests/simple.rs new file mode 100644 index 0000000..edf9996 --- /dev/null +++ b/crates/parse_lr1/tests/simple.rs @@ -0,0 +1,66 @@ +use serde::{Serialize, Deserialize}; + +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::{LexSource, LexDriver}; +use copager_lex_regex::RegexLexer; +use copager_parse::{ParseSource, ParseDriver}; +use copager_parse_lr1::LR1; +use copager_utils::cache::Cacheable; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize +)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize +)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +#[test] +fn simple_success() -> anyhow::Result<()> { + let lexer = RegexLexer::from(ExprToken::default()); + let lexer = lexer.init("1 + 2 * 3"); + + let parser_conf = LR1::new((ExprToken::default(), ExprRule::default()))?; + let parser = LR1::from(&parser_conf); + let parser = parser.init(lexer); + + assert_eq!(parser.count(), 0); + + Ok(()) +} From 9288fd12fa42e17ae76d2bb90204b5c3190ca0cd Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Thu, 5 Sep 2024 23:19:40 +0900 Subject: [PATCH 28/55] [change] {Lex, Parse}Driver.init -> .run --- crates/core/src/lib.rs | 2 +- crates/lex/src/lib.rs | 2 +- crates/lex_regex/src/lib.rs | 2 +- crates/lex_regex/tests/simple.rs | 4 ++-- crates/parse/src/lib.rs | 2 +- crates/parse_lr1/src/lib.rs | 2 +- crates/parse_lr1/tests/simple.rs | 4 ++-- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 9143bca..9d95d95 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -33,7 +33,7 @@ where let lexer = Dl::from(Sl::default()); let parser = Dp::from(Sp::default()); loop { - for _ in parser.init(lexer.init(input)) { + for _ in parser.run(lexer.run(input)) { println!("-----"); } } diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs index 3a0ad8f..ff6e108 100644 --- a/crates/lex/src/lib.rs +++ b/crates/lex/src/lib.rs @@ -16,5 +16,5 @@ where { type From; - fn init<'input>(&self, input: &'input str) -> impl Iterator>; + fn run<'input>(&self, input: &'input str) -> impl Iterator>; } diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index 9abe16e..919358b 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -44,7 +44,7 @@ where { type From = S; - gen fn init<'input>(&self, input: &'input str) -> Token<'input, T> { + gen fn run<'input>(&self, input: &'input str) -> Token<'input, T> { let mut pos = 0; loop { // Skip Spaces diff --git a/crates/lex_regex/tests/simple.rs b/crates/lex_regex/tests/simple.rs index 45813f5..63c30ba 100644 --- a/crates/lex_regex/tests/simple.rs +++ b/crates/lex_regex/tests/simple.rs @@ -26,7 +26,7 @@ enum ExprToken { #[test] fn simple_success() { let lexer = RegexLexer::from(ExprToken::default()); - let mut lexer = lexer.init("1 + 2 * 3"); + let mut lexer = lexer.run("1 + 2 * 3"); assert_eq_token(lexer.next(), "1"); assert_eq_token(lexer.next(), "+"); assert_eq_token(lexer.next(), "2"); @@ -39,7 +39,7 @@ fn simple_success() { #[should_panic] fn simple_failed() { let lexer = RegexLexer::from(ExprToken::default()); - let mut lexer = lexer.init("1 + 2 * stop 3"); + let mut lexer = lexer.run("1 + 2 * stop 3"); assert_eq_token(lexer.next(), "1"); assert_eq_token(lexer.next(), "+"); assert_eq_token(lexer.next(), "2"); diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 4d39fe2..467e812 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -33,7 +33,7 @@ where { type From; - fn init<'input, Il>(&self, lexer: Il) -> impl Iterator + fn run<'input, Il>(&self, lexer: Il) -> impl Iterator where Il: Iterator>; } diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index 91c6b29..b21d8b5 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -59,7 +59,7 @@ where { type From = &'cache LR1Configure; - gen fn init<'input, Il>(&self, mut lexer: Il) + gen fn run<'input, Il>(&self, mut lexer: Il) where Il: Iterator>, { diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr1/tests/simple.rs index edf9996..214278b 100644 --- a/crates/parse_lr1/tests/simple.rs +++ b/crates/parse_lr1/tests/simple.rs @@ -54,11 +54,11 @@ enum ExprRule { #[test] fn simple_success() -> anyhow::Result<()> { let lexer = RegexLexer::from(ExprToken::default()); - let lexer = lexer.init("1 + 2 * 3"); + let lexer = lexer.run("1 + 2 * 3"); let parser_conf = LR1::new((ExprToken::default(), ExprRule::default()))?; let parser = LR1::from(&parser_conf); - let parser = parser.init(lexer); + let parser = parser.run(lexer); assert_eq!(parser.count(), 0); From 3e3a0b9efcff2f28905f6bc24b65afd6b436157f Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 16:41:26 +0900 Subject: [PATCH 29/55] =?UTF-8?q?[add]=20copager=5Fcore=20=E3=82=AF?= =?UTF-8?q?=E3=83=AC=E3=83=BC=E3=83=88=E6=95=B4=E5=82=99(=E4=BB=AE)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 21 +++ crates/core/Cargo.toml | 9 ++ crates/core/src/lib.rs | 218 ++++++++++++++++++++++++--- crates/core/tests/prebuild.rs | 85 +++++++++++ crates/core/tests/simple.rs | 68 +++++++++ crates/core/tests/simple_multiple.rs | 118 +++++++++++++++ crates/parse_lr1/src/lib.rs | 16 +- crates/parse_lr1/tests/simple.rs | 2 +- crates/utils/src/cache.rs | 6 +- 9 files changed, 514 insertions(+), 29 deletions(-) create mode 100644 crates/core/tests/prebuild.rs create mode 100644 crates/core/tests/simple.rs create mode 100644 crates/core/tests/simple_multiple.rs diff --git a/Cargo.lock b/Cargo.lock index e332b3f..62b6862 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -49,9 +49,14 @@ version = "0.1.1" dependencies = [ "anyhow", "copager_cfg", + "copager_core", "copager_lex", + "copager_lex_regex", "copager_parse", + "copager_parse_lr1", "copager_utils", + "serde", + "serde_cbor", "thiserror", ] @@ -170,6 +175,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "half" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" + [[package]] name = "itertools" version = "0.12.1" @@ -269,6 +280,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.202" diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index b379073..284ceed 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -6,7 +6,16 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } +serde = { workspace = true } +serde_cbor = "0.11.2" copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } copager_utils = { path = "../utils" } + +[dev-dependencies] +copager_core = { path = "." } +copager_lex = { path = "../lex", features = ["derive"]} +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_parse_lr1 = { path = "../parse_lr1" } diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 9d95d95..eaa1f2c 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -2,40 +2,224 @@ pub mod error; use std::marker::PhantomData; +use serde::{Serialize, Deserialize}; +use serde_cbor::ser::to_vec_packed; +use serde_cbor::de::from_slice; + use copager_lex::{LexSource, LexDriver}; use copager_parse::{ParseSource, ParseDriver}; +use copager_utils::cache::Cacheable; + +pub trait GrammarDesign { + type Lex: LexSource; + type Parse: ParseSource<::Tag>; +} -pub struct Processor +pub struct Grammar where Sl: LexSource, - Dl: LexDriver, Sp: ParseSource, - Dp: ParseDriver, { _phantom_sl: PhantomData, - _phantom_il: PhantomData
, _phantom_sp: PhantomData, - _phantom_ip: PhantomData, } -impl<'cache, Sl, Dl, Sp, Dp> Processor +impl GrammarDesign for Grammar where Sl: LexSource, - Dl: LexDriver, Sp: ParseSource, - Dp: ParseDriver, { - pub fn process<'input>(input: &'input str) + type Lex = Sl; + type Parse = Sp; +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Processor +where + G: GrammarDesign, + Dl: LexDriver<::Tag>, + Dp: ParseDriver< + ::Tag, + ::Tag>>::Tag, + >, +{ + // Cache + cache_lex: Option>, + cache_parse: Option>, + + // Driver + #[serde(skip, default="Option::default")] + lexer: Option
, + #[serde(skip, default="Option::default")] + parser: Option, + + // Phantom + #[serde(skip)] + _phantom_g: PhantomData, + #[serde(skip)] + _phantom_dl: PhantomData
, + #[serde(skip)] + _phantom_dp: PhantomData, +} + +impl Processor +where + G: GrammarDesign, + Dl: LexDriver<::Tag>, + Dp: ParseDriver< + ::Tag, + ::Tag>>::Tag, + >, +{ + pub fn new() -> Self { + Processor { + cache_lex: None, + cache_parse: None, + lexer: None, + parser: None, + _phantom_g: PhantomData, + _phantom_dl: PhantomData, + _phantom_dp: PhantomData, + } + } + + pub fn prebuild_lexer(self) -> anyhow::Result where - Sl: Default, - Sp: Default, + G::Lex: Default, + Dl: Cacheable, { - let lexer = Dl::from(Sl::default()); - let parser = Dp::from(Sp::default()); - loop { - for _ in parser.run(lexer.run(input)) { - println!("-----"); - } + self.prebuild_lexer_by(G::Lex::default()) + } + + pub fn prebuild_lexer_by(mut self, source: G::Lex) -> anyhow::Result + where + Dl: Cacheable, + { + assert!(self.cache_lex.is_none()); + + let cache_lex = Dl::new(source)?; + self.cache_lex = Some(to_vec_packed(&cache_lex)?); + + Ok(self) + } + + pub fn prebuild_parser(self) -> anyhow::Result + where + G::Lex: Default, + G::Parse: Default, + Dp: Cacheable<(G::Lex, G::Parse)>, + { + self.prebuild_parser_by((G::Lex::default(), G::Parse::default())) + } + + pub fn prebuild_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result + where + G::Lex: Default, + G::Parse: Default, + Dp: Cacheable<(G::Lex, G::Parse)>, + { + assert!(self.cache_parse.is_none()); + + let cache_parse = Dp::new(source)?; + self.cache_parse = Some(to_vec_packed(&cache_parse)?); + + Ok(self) + } + + pub fn build_lexer(self) -> Self + where + G::Lex: Default, + Dl: LexDriver<::Tag, From = G::Lex>, + { + self.build_lexer_by(G::Lex::default()) + } + + pub fn build_lexer_by(mut self, source: G::Lex) -> Self + where + G::Lex: Default, + Dl: LexDriver<::Tag, From = G::Lex>, + { + assert!(self.cache_lex.is_none()); + + let lexer = Dl::from(source); + self.lexer = Some(lexer); + + self + } + + pub fn build_lexer_by_cache(mut self) -> Self + where + G::Lex: Default, + Dl: Cacheable, + { + assert!(self.lexer.is_some()); + + let cache_lex = self.cache_lex.as_ref().unwrap(); + let cache_lex = from_slice(cache_lex); + let lexer = Dl::restore(cache_lex.unwrap()); + self.lexer = Some(lexer); + + self + } + + pub fn build_parser(self) -> Self + where + G::Lex: Default, + G::Parse: Default, + Dp: ParseDriver< + ::Tag, + ::Tag>>::Tag, + From = (G::Lex, G::Parse), + >, + { + self.build_parser_by((G::Lex::default(), G::Parse::default())) + } + + pub fn build_parser_by(mut self, source: (G::Lex, G::Parse)) -> Self + where + G::Lex: Default, + G::Parse: Default, + Dp: ParseDriver< + ::Tag, + ::Tag>>::Tag, + From = (G::Lex, G::Parse), + >, + { + assert!(self.cache_parse.is_none()); + + let parser = Dp::from(source); + self.parser = Some(parser); + + self + } + + pub fn build_parser_by_cache(mut self) -> Self + where + G::Lex: Default, + G::Parse: Default, + Dp: Cacheable<(G::Lex, G::Parse)>, + { + assert!(self.parser.is_none()); + + let cache_parse = self.cache_parse.as_ref().unwrap(); + let cache_parse = from_slice(cache_parse); + let parser = Dp::restore(cache_parse.unwrap()); + self.parser = Some(parser); + + self + } + + pub fn process<'input>(&self, input: &'input str) -> anyhow::Result<()> { + assert!(self.lexer.is_some()); + assert!(self.parser.is_some()); + + let lexer = self.lexer.as_ref().unwrap(); + let parser = self.parser.as_ref().unwrap(); + + for result in parser.run(lexer.run(input)) { + println!("{:?}", result); } + + Ok(()) } } diff --git a/crates/core/tests/prebuild.rs b/crates/core/tests/prebuild.rs new file mode 100644 index 0000000..f8d5f7a --- /dev/null +++ b/crates/core/tests/prebuild.rs @@ -0,0 +1,85 @@ +use serde::{Serialize, Deserialize}; +use serde_cbor::ser::to_vec_packed; +use serde_cbor::de::from_slice; + +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr1::LR1; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize +)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize +)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyGrammar = Grammar; +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +#[test] +fn prebuild() -> anyhow::Result<()> { + // in build.rs + let prebuiled_processor = build_rs()?; + let serialized = to_vec_packed(&prebuiled_processor)?; + + // in main.rs + let deserialized: MyProcessor = from_slice(&serialized)?; + main_rs(deserialized)?; + + Ok(()) +} + +fn build_rs() -> anyhow::Result { + MyProcessor::new().prebuild_parser() +} + +fn main_rs(processor: MyProcessor) -> anyhow::Result<()> { + processor + .build_lexer() + .build_parser_by_cache() + .process("1 + 2 * 3")?; + + Ok(()) +} diff --git a/crates/core/tests/simple.rs b/crates/core/tests/simple.rs new file mode 100644 index 0000000..369d456 --- /dev/null +++ b/crates/core/tests/simple.rs @@ -0,0 +1,68 @@ +use serde::{Serialize, Deserialize}; + +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr1::LR1; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize +)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize +)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyGrammar = Grammar; +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +#[test] +fn simple_success() -> anyhow::Result<()> { + MyProcessor::new() + .build_lexer() + .prebuild_parser()? + .build_parser_by_cache() + .process("1 + 2 * 3")?; + + Ok(()) +} diff --git a/crates/core/tests/simple_multiple.rs b/crates/core/tests/simple_multiple.rs new file mode 100644 index 0000000..9e57221 --- /dev/null +++ b/crates/core/tests/simple_multiple.rs @@ -0,0 +1,118 @@ +use serde::{Serialize, Deserialize}; + +use copager_core::{Grammar, Processor}; +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::LexSource; +use copager_lex_regex::RegexLexer; +use copager_parse::ParseSource; +use copager_parse_lr1::LR1; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize +)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize +)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyGrammar = Grammar; +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +const OK_INPUTS: [&str; 7] = [ + "1 + 2", + "1 + 2 * 3", + "1 + 2 * 3 / 4", + "1 + 2 * (3 / 4)", + "1 + 2 * (3 / 4) - 5", + "1 + 2 * (3 / 4) - 5 * 6", + "(1 + 2) * ((3 / 4) - 5 * 6 / 7)", +]; + +const ERR_INPUTS: [&str; 7] = [ + "1 +", + "1 + 2 *", + "1 + 2 * 3 /", + "1 + 2 * (3 /", + "1 + 2 * (3 / 4", + "1 + 2 * (3 / 4) -", + "(1 + 2) * ((3 / 4) - 5 * 6 /", +]; + +#[test] +fn simple_multiple_only_success() { + let processor = gen_processor(); + for input in OK_INPUTS { + assert!(processor.process(input).is_ok()); + } +} + +#[test] +fn simple_multiple_only_failure() { + let processor = gen_processor(); + for input in ERR_INPUTS { + assert!(processor.process(input).is_err()); + } +} + +#[test] +fn simple_multiple_mix_success_and_failure() { + let mixed_testcases = OK_INPUTS + .iter() + .zip(ERR_INPUTS.iter()) + .flat_map(|(ok, err)| vec![(true, ok), (false, err)]); + + let processor = gen_processor(); + for (is_ok, input) in mixed_testcases { + if is_ok { + assert!(processor.process(input).is_ok()); + } else { + assert!(processor.process(input).is_err()); + } + } +} + +fn gen_processor() -> MyProcessor { + MyProcessor::new() + .build_lexer() + .prebuild_parser() + .unwrap() + .build_parser_by_cache() +} diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index b21d8b5..19f5837 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -16,15 +16,15 @@ use builder::{LR1Configure, LRAction}; use error::ParseError; #[derive(Debug)] -pub struct LR1<'cache, Sl, Sp> +pub struct LR1 where Sl: LexSource, Sp: ParseSource, { - tables: &'cache LR1Configure, + tables: LR1Configure, } -impl<'cache, Sl, Sp> Cacheable<'cache, (Sl, Sp)> for LR1<'cache, Sl, Sp> +impl Cacheable<(Sl, Sp)> for LR1 where Sl: LexSource, Sl::Tag: Serialize + for<'de> Deserialize<'de>, @@ -37,27 +37,27 @@ where Ok(LR1Configure::new(&source_l, &source_p)?) } - fn restore(tables: &'cache Self::Cache) -> Self { + fn restore(tables: Self::Cache) -> Self { Self::from(tables) } } -impl<'cache, Sl, Sp> From<&'cache LR1Configure> for LR1<'cache, Sl, Sp> +impl From> for LR1 where Sl: LexSource, Sp: ParseSource, { - fn from(tables: &'cache LR1Configure) -> Self { + fn from(tables: LR1Configure) -> Self { LR1 { tables } } } -impl<'cache, Sl, Sp> ParseDriver for LR1<'cache, Sl, Sp> +impl ParseDriver for LR1 where Sl: LexSource, Sp: ParseSource, { - type From = &'cache LR1Configure; + type From = LR1Configure; gen fn run<'input, Il>(&self, mut lexer: Il) where diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr1/tests/simple.rs index 214278b..42974b9 100644 --- a/crates/parse_lr1/tests/simple.rs +++ b/crates/parse_lr1/tests/simple.rs @@ -57,7 +57,7 @@ fn simple_success() -> anyhow::Result<()> { let lexer = lexer.run("1 + 2 * 3"); let parser_conf = LR1::new((ExprToken::default(), ExprRule::default()))?; - let parser = LR1::from(&parser_conf); + let parser = LR1::from(parser_conf); let parser = parser.run(lexer); assert_eq!(parser.count(), 0); diff --git a/crates/utils/src/cache.rs b/crates/utils/src/cache.rs index e8b3374..11cc551 100644 --- a/crates/utils/src/cache.rs +++ b/crates/utils/src/cache.rs @@ -1,11 +1,11 @@ use serde::{Serialize, Deserialize}; -pub trait Cacheable<'cache, F> +pub trait Cacheable where Self: Sized, { - type Cache: Serialize + Deserialize<'cache>; + type Cache: Serialize + for<'de> Deserialize<'de>; fn new(from: F) -> anyhow::Result; - fn restore(cache: &'cache Self::Cache) -> Self; + fn restore(cache: Self::Cache) -> Self; } From b96ea204fdddb81b4f67b54c3c82033824fc1471 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 17:10:11 +0900 Subject: [PATCH 30/55] =?UTF-8?q?[change]=20LexDriver=20=E3=81=AB=E8=BC=89?= =?UTF-8?q?=E3=81=9B=E3=82=8B=20From=20=E3=81=AE=E4=BB=95=E6=A7=98?= =?UTF-8?q?=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/core/src/lib.rs | 14 ++++++-------- crates/core/tests/prebuild.rs | 2 +- crates/core/tests/simple.rs | 2 +- crates/core/tests/simple_multiple.rs | 1 + crates/lex/src/lib.rs | 11 +++++------ crates/lex_regex/src/lib.rs | 28 ++++++++-------------------- crates/lex_regex/tests/simple.rs | 8 ++++++-- crates/parse_lr1/tests/simple.rs | 8 +++++--- 8 files changed, 33 insertions(+), 41 deletions(-) diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index eaa1f2c..1ffea75 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -37,7 +37,7 @@ where pub struct Processor where G: GrammarDesign, - Dl: LexDriver<::Tag>, + Dl: LexDriver, Dp: ParseDriver< ::Tag, ::Tag>>::Tag, @@ -65,7 +65,7 @@ where impl Processor where G: GrammarDesign, - Dl: LexDriver<::Tag>, + Dl: LexDriver, Dp: ParseDriver< ::Tag, ::Tag>>::Tag, @@ -126,25 +126,23 @@ where Ok(self) } - pub fn build_lexer(self) -> Self + pub fn build_lexer(self) -> anyhow::Result where G::Lex: Default, - Dl: LexDriver<::Tag, From = G::Lex>, { self.build_lexer_by(G::Lex::default()) } - pub fn build_lexer_by(mut self, source: G::Lex) -> Self + pub fn build_lexer_by(mut self, source: G::Lex) -> anyhow::Result where G::Lex: Default, - Dl: LexDriver<::Tag, From = G::Lex>, { assert!(self.cache_lex.is_none()); - let lexer = Dl::from(source); + let lexer = Dl::try_from(source)?; self.lexer = Some(lexer); - self + Ok(self) } pub fn build_lexer_by_cache(mut self) -> Self diff --git a/crates/core/tests/prebuild.rs b/crates/core/tests/prebuild.rs index f8d5f7a..2a15fc1 100644 --- a/crates/core/tests/prebuild.rs +++ b/crates/core/tests/prebuild.rs @@ -77,7 +77,7 @@ fn build_rs() -> anyhow::Result { fn main_rs(processor: MyProcessor) -> anyhow::Result<()> { processor - .build_lexer() + .build_lexer()? .build_parser_by_cache() .process("1 + 2 * 3")?; diff --git a/crates/core/tests/simple.rs b/crates/core/tests/simple.rs index 369d456..d449248 100644 --- a/crates/core/tests/simple.rs +++ b/crates/core/tests/simple.rs @@ -59,7 +59,7 @@ type MyProcessor = Processor; #[test] fn simple_success() -> anyhow::Result<()> { MyProcessor::new() - .build_lexer() + .build_lexer()? .prebuild_parser()? .build_parser_by_cache() .process("1 + 2 * 3")?; diff --git a/crates/core/tests/simple_multiple.rs b/crates/core/tests/simple_multiple.rs index 9e57221..01a0bfc 100644 --- a/crates/core/tests/simple_multiple.rs +++ b/crates/core/tests/simple_multiple.rs @@ -112,6 +112,7 @@ fn simple_multiple_mix_success_and_failure() { fn gen_processor() -> MyProcessor { MyProcessor::new() .build_lexer() + .unwrap() .prebuild_parser() .unwrap() .build_parser_by_cache() diff --git a/crates/lex/src/lib.rs b/crates/lex/src/lib.rs index ff6e108..76bd97f 100644 --- a/crates/lex/src/lib.rs +++ b/crates/lex/src/lib.rs @@ -9,12 +9,11 @@ pub trait LexSource { fn iter(&self) -> impl Iterator; } -pub trait LexDriver +pub trait LexDriver where - Self: Sized + From, - T: TokenTag, + Self: Sized, + S: LexSource, { - type From; - - fn run<'input>(&self, input: &'input str) -> impl Iterator>; + fn try_from(source: S) -> anyhow::Result; + fn run<'input>(&self, input: &'input str) -> impl Iterator>; } diff --git a/crates/lex_regex/src/lib.rs b/crates/lex_regex/src/lib.rs index 919358b..2e22254 100644 --- a/crates/lex_regex/src/lib.rs +++ b/crates/lex_regex/src/lib.rs @@ -14,37 +14,25 @@ pub struct RegexLexer { regex_map: Rc>, } -impl From for RegexLexer -where - T: TokenTag, - S: LexSource, -{ - fn from(source: S) -> Self { // TODO: -> try_from - let regex_istr = Regex::new(source.ignore_token()).unwrap(); +impl LexDriver for RegexLexer { + fn try_from(source: S) -> anyhow::Result { + let regex_istr = Regex::new(source.ignore_token())?; let regex_set = source.iter() .map(|token| token.as_str()) .collect::>(); - let regex_set = RegexSet::new(regex_set).unwrap(); + let regex_set = RegexSet::new(regex_set)?; let regex_map = source.iter() .map(|token| Ok((Regex::new(token.as_str())?, token))) - .collect::>>().unwrap(); + .collect::>>()?; - RegexLexer { + Ok(RegexLexer { regex_istr: Rc::new(regex_istr), regex_set: Rc::new(regex_set), regex_map: Rc::new(regex_map), - } + }) } -} - -impl LexDriver for RegexLexer -where - T: TokenTag, - S: LexSource, -{ - type From = S; - gen fn run<'input>(&self, input: &'input str) -> Token<'input, T> { + gen fn run<'input>(&self, input: &'input str) -> Token<'input, S::Tag> { let mut pos = 0; loop { // Skip Spaces diff --git a/crates/lex_regex/tests/simple.rs b/crates/lex_regex/tests/simple.rs index 63c30ba..6a15694 100644 --- a/crates/lex_regex/tests/simple.rs +++ b/crates/lex_regex/tests/simple.rs @@ -23,9 +23,12 @@ enum ExprToken { _Whitespace, } +type MyLexer = RegexLexer; + #[test] fn simple_success() { - let lexer = RegexLexer::from(ExprToken::default()); + let source = ExprToken::default(); + let lexer = >::try_from(source).unwrap(); let mut lexer = lexer.run("1 + 2 * 3"); assert_eq_token(lexer.next(), "1"); assert_eq_token(lexer.next(), "+"); @@ -38,7 +41,8 @@ fn simple_success() { #[test] #[should_panic] fn simple_failed() { - let lexer = RegexLexer::from(ExprToken::default()); + let source = ExprToken::default(); + let lexer = >::try_from(source).unwrap(); let mut lexer = lexer.run("1 + 2 * stop 3"); assert_eq_token(lexer.next(), "1"); assert_eq_token(lexer.next(), "+"); diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr1/tests/simple.rs index 42974b9..f1fbb3d 100644 --- a/crates/parse_lr1/tests/simple.rs +++ b/crates/parse_lr1/tests/simple.rs @@ -51,14 +51,16 @@ enum ExprRule { Num, } +type MyLexer = RegexLexer; + #[test] fn simple_success() -> anyhow::Result<()> { - let lexer = RegexLexer::from(ExprToken::default()); - let lexer = lexer.run("1 + 2 * 3"); + let source = ExprToken::default(); + let lexer = >::try_from(source).unwrap(); let parser_conf = LR1::new((ExprToken::default(), ExprRule::default()))?; let parser = LR1::from(parser_conf); - let parser = parser.run(lexer); + let parser = parser.run(lexer.run("1 + 2 * 3")); assert_eq!(parser.count(), 0); From 83df893fc27d805396ef6bfbde31835b320453be Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 17:24:26 +0900 Subject: [PATCH 31/55] =?UTF-8?q?[change]=20ParseDriver=20=E3=81=AB?= =?UTF-8?q?=E8=BC=89=E3=81=9B=E3=82=8B=20From=20=E3=81=AE=E4=BB=95?= =?UTF-8?q?=E6=A7=98=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/core/src/lib.rs | 28 ++++++---------------------- crates/core/tests/simple.rs | 3 +-- crates/core/tests/simple_multiple.rs | 3 +-- crates/parse/src/lib.rs | 14 +++++++------- crates/parse_lr1/src/lib.rs | 17 +++++------------ crates/parse_lr1/tests/simple.rs | 10 +++++----- 6 files changed, 25 insertions(+), 50 deletions(-) diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 1ffea75..c82bf29 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -38,10 +38,7 @@ pub struct Processor where G: GrammarDesign, Dl: LexDriver, - Dp: ParseDriver< - ::Tag, - ::Tag>>::Tag, - >, + Dp: ParseDriver, { // Cache cache_lex: Option>, @@ -66,10 +63,7 @@ impl Processor where G: GrammarDesign, Dl: LexDriver, - Dp: ParseDriver< - ::Tag, - ::Tag>>::Tag, - >, + Dp: ParseDriver, { pub fn new() -> Self { Processor { @@ -160,35 +154,25 @@ where self } - pub fn build_parser(self) -> Self + pub fn build_parser(self) -> anyhow::Result where G::Lex: Default, G::Parse: Default, - Dp: ParseDriver< - ::Tag, - ::Tag>>::Tag, - From = (G::Lex, G::Parse), - >, { self.build_parser_by((G::Lex::default(), G::Parse::default())) } - pub fn build_parser_by(mut self, source: (G::Lex, G::Parse)) -> Self + pub fn build_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result where G::Lex: Default, G::Parse: Default, - Dp: ParseDriver< - ::Tag, - ::Tag>>::Tag, - From = (G::Lex, G::Parse), - >, { assert!(self.cache_parse.is_none()); - let parser = Dp::from(source); + let parser = Dp::try_from(source)?; self.parser = Some(parser); - self + Ok(self) } pub fn build_parser_by_cache(mut self) -> Self diff --git a/crates/core/tests/simple.rs b/crates/core/tests/simple.rs index d449248..f79174b 100644 --- a/crates/core/tests/simple.rs +++ b/crates/core/tests/simple.rs @@ -60,8 +60,7 @@ type MyProcessor = Processor; fn simple_success() -> anyhow::Result<()> { MyProcessor::new() .build_lexer()? - .prebuild_parser()? - .build_parser_by_cache() + .build_parser()? .process("1 + 2 * 3")?; Ok(()) diff --git a/crates/core/tests/simple_multiple.rs b/crates/core/tests/simple_multiple.rs index 01a0bfc..cc19180 100644 --- a/crates/core/tests/simple_multiple.rs +++ b/crates/core/tests/simple_multiple.rs @@ -113,7 +113,6 @@ fn gen_processor() -> MyProcessor { MyProcessor::new() .build_lexer() .unwrap() - .prebuild_parser() + .build_parser() .unwrap() - .build_parser_by_cache() } diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 467e812..d8d301a 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -1,5 +1,6 @@ use copager_cfg::token::{TokenTag, Token}; use copager_cfg::rule::{RuleTag, RuleSet}; +use copager_lex::LexSource; #[cfg(feature = "derive")] pub use copager_parse_derive::ParseSource; @@ -25,15 +26,14 @@ pub trait ParseSource { } } -pub trait ParseDriver +pub trait ParseDriver where - Self: From, - T: TokenTag, - R: RuleTag, + Self: Sized, + Sl: LexSource, + Sp: ParseSource, { - type From; - + fn try_from(source: (Sl, Sp)) -> anyhow::Result; fn run<'input, Il>(&self, lexer: Il) -> impl Iterator where - Il: Iterator>; + Il: Iterator>; } diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index 19f5837..5ac444b 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -38,26 +38,19 @@ where } fn restore(tables: Self::Cache) -> Self { - Self::from(tables) - } -} - -impl From> for LR1 -where - Sl: LexSource, - Sp: ParseSource, -{ - fn from(tables: LR1Configure) -> Self { LR1 { tables } } } -impl ParseDriver for LR1 +impl ParseDriver for LR1 where Sl: LexSource, Sp: ParseSource, { - type From = LR1Configure; + fn try_from((source_l, source_p): (Sl, Sp)) -> anyhow::Result { + let tables = LR1Configure::new(&source_l, &source_p)?; + Ok(LR1 { tables }) + } gen fn run<'input, Il>(&self, mut lexer: Il) where diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr1/tests/simple.rs index f1fbb3d..3b6aae7 100644 --- a/crates/parse_lr1/tests/simple.rs +++ b/crates/parse_lr1/tests/simple.rs @@ -6,7 +6,6 @@ use copager_lex::{LexSource, LexDriver}; use copager_lex_regex::RegexLexer; use copager_parse::{ParseSource, ParseDriver}; use copager_parse_lr1::LR1; -use copager_utils::cache::Cacheable; #[derive( Debug, Default, Copy, Clone, Hash, PartialEq, Eq, @@ -52,17 +51,18 @@ enum ExprRule { } type MyLexer = RegexLexer; +type MyParser = LR1; #[test] fn simple_success() -> anyhow::Result<()> { let source = ExprToken::default(); let lexer = >::try_from(source).unwrap(); - let parser_conf = LR1::new((ExprToken::default(), ExprRule::default()))?; - let parser = LR1::from(parser_conf); - let parser = parser.run(lexer.run("1 + 2 * 3")); + let source = (ExprToken::default(), ExprRule::default()); + let parser = >::try_from(source)?; - assert_eq!(parser.count(), 0); + let result = parser.run(lexer.run("1 + 2 * 3")); + assert_eq!(result.count(), 0); Ok(()) } From 2667c3c42e1fd13271f3fa7dee6ddcad17df8b8b Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 17:56:08 +0900 Subject: [PATCH 32/55] =?UTF-8?q?[add]=20parse::ParseState=20=E5=88=97?= =?UTF-8?q?=E6=8C=99=E5=9E=8B=E5=AE=9A=E7=BE=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/core/src/lib.rs | 12 ++++++++++-- crates/parse/src/lib.rs | 12 +++++++++++- crates/parse_lr1/src/lib.rs | 18 ++++++------------ 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index c82bf29..aae2bbd 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -7,7 +7,7 @@ use serde_cbor::ser::to_vec_packed; use serde_cbor::de::from_slice; use copager_lex::{LexSource, LexDriver}; -use copager_parse::{ParseSource, ParseDriver}; +use copager_parse::{ParseSource, ParseDriver, ParseState}; use copager_utils::cache::Cacheable; pub trait GrammarDesign { @@ -199,7 +199,15 @@ where let parser = self.parser.as_ref().unwrap(); for result in parser.run(lexer.run(input)) { - println!("{:?}", result); + match result { + ParseState::Consume(token) => { + println!("Consume: {:?}", token); + }, + ParseState::Reduce(rule) => { + println!("Reduce: {:?}", rule); + }, + ParseState::Err(err) => return Err(err), + } } Ok(()) diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index d8d301a..082a9c1 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -33,7 +33,17 @@ where Sp: ParseSource, { fn try_from(source: (Sl, Sp)) -> anyhow::Result; - fn run<'input, Il>(&self, lexer: Il) -> impl Iterator + fn run<'input, Il>(&self, lexer: Il) -> impl Iterator> where Il: Iterator>; } + +pub enum ParseState<'input, T, R> +where + T: TokenTag, + R: RuleTag, +{ + Consume(Token<'input, T>), + Reduce(R), + Err(anyhow::Error) +} diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index 5ac444b..fd4c543 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -9,7 +9,7 @@ use serde::{Serialize, Deserialize}; use copager_cfg::token::Token; use copager_lex::LexSource; -use copager_parse::{ParseSource, ParseDriver}; +use copager_parse::{ParseSource, ParseDriver, ParseState}; use copager_utils::cache::Cacheable; use builder::{LR1Configure, LRAction}; @@ -52,7 +52,7 @@ where Ok(LR1 { tables }) } - gen fn run<'input, Il>(&self, mut lexer: Il) + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseState<'input, Sl::Tag, Sp::Tag> where Il: Iterator>, { @@ -71,29 +71,23 @@ where match action { (LRAction::Shift(new_state), Some(token)) => { stack.push(*new_state); - // builder.push(token); - println!("Shift: {}", token.as_str()); + yield ParseState::Consume(token); break; } (LRAction::Reduce(tag, goto, elems_cnt), _) => { stack.truncate(stack.len() - elems_cnt); stack.push(self.tables.goto_table[stack[stack.len() - 1]][*goto]); - // builder.wrap(*tag, *elems_cnt); - println!("Reduce: {:?}", tag); + yield ParseState::Reduce(*tag); } (LRAction::Accept, _) => { - // return builder.build(); - println!("Done!"); return; } (LRAction::None, Some(token)) => { - // return Err(ParseError::new_unexpected_token(token).into()); - println!("Unexpected: {:?}", token); + yield ParseState::Err(ParseError::new_unexpected_token(token).into()); return; } (LRAction::None, None) => { - // return Err(ParseError::UnexpectedEOF.into()); - println!("Unexpected EOF"); + yield ParseState::Err(ParseError::UnexpectedEOF.into()); return; } _ => unreachable!(), From 12975a56e9a86fc6ee443812fd29e9700046fe3a Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 18:15:54 +0900 Subject: [PATCH 33/55] =?UTF-8?q?[update]=20parse=5Flr1=20=E3=82=AF?= =?UTF-8?q?=E3=83=AC=E3=83=BC=E3=83=88=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=82=92=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/parse_lr1/src/lib.rs | 82 -------------------------------- crates/parse_lr1/tests/simple.rs | 49 ++++++++++++++++--- 2 files changed, 43 insertions(+), 88 deletions(-) diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index fd4c543..5c69b8c 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -96,85 +96,3 @@ where } } } - -// #[cfg(test)] -// mod test { -// use copager_core::cfg::{TokenSet, Syntax, Rule, RuleElem}; -// use copager_core::Parser; - -// use super::LR1; - -// #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, TokenSet)] -// enum TestTokenSet { -// #[token(regex = r"\+")] -// Plus, -// #[token(regex = r"-")] -// Minus, -// #[token(regex = r"\*")] -// Mul, -// #[token(regex = r"/")] -// Div, -// #[token(regex = r"\(")] -// BracketL, -// #[token(regex = r"\)")] -// BracketR, -// #[token(regex = r"[1-9][0-9]*")] -// Num, -// #[token(regex = r"[ \t\n]+", ignored)] -// _Whitespace, -// } - -// #[derive(Debug, Clone, Copy, Syntax)] -// enum TestSyntax { -// #[rule(" ::= Plus ")] -// #[rule(" ::= Minus ")] -// #[rule(" ::= ")] -// Expr, -// #[rule(" ::= Mul ")] -// #[rule(" ::= Div ")] -// #[rule(" ::= ")] -// Term, -// #[rule(" ::= BracketL BracketR")] -// #[rule(" ::= Num")] -// Num, -// } - -// #[test] -// fn input_ok() { -// let inputs = vec![ -// "10", -// "10 + 20", -// "10 - 20", -// "10 * 20", -// "10 / 20", -// "10 + 20 * 30 - 40", -// "(10)", -// "((((10))))", -// "10 * (20 - 30)", -// "((10 + 20) * (30 / 40)) - 50", -// ]; - -// let parser = Parser::>::new().unwrap(); -// for input in inputs { -// assert!(parser.parse(input).is_ok(), "{}", input); -// } -// } - -// #[test] -// fn input_err() { -// let inputs = vec![ -// "()", -// "(10 -", -// "10 +", -// "*", -// "10 20 + 30", -// "10 + 20 * 30 / 40 (", -// "(((10))", -// ]; - -// let parser = Parser::>::new().unwrap(); -// for input in inputs { -// assert!(parser.parse(input).is_err(), "{}", input); -// } -// } -// } diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr1/tests/simple.rs index 3b6aae7..d2f30c2 100644 --- a/crates/parse_lr1/tests/simple.rs +++ b/crates/parse_lr1/tests/simple.rs @@ -4,7 +4,7 @@ use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::{LexSource, LexDriver}; use copager_lex_regex::RegexLexer; -use copager_parse::{ParseSource, ParseDriver}; +use copager_parse::{ParseSource, ParseDriver, ParseState}; use copager_parse_lr1::LR1; #[derive( @@ -53,16 +53,53 @@ enum ExprRule { type MyLexer = RegexLexer; type MyParser = LR1; +const OK_INPUTS: [&str; 10] = [ + "10", + "10 + 20", + "10 - 20", + "10 * 20", + "10 / 20", + "10 + 20 * 30 - 40", + "(10)", + "((((10))))", + "10 * (20 - 30)", + "((10 + 20) * (30 / 40)) - 50", +]; + +const ERR_INPUTS: [&str; 7] = [ + "()", + "(10 -", + "10 +", + "*", + "10 20 + 30", + "10 + 20 * 30 / 40 (", + "(((10))", +]; + #[test] -fn simple_success() -> anyhow::Result<()> { +fn simple_success() { + for input in &OK_INPUTS { + assert!(parse(input), "{}", input); + } +} + +#[test] +fn simple_failure() { + for input in &ERR_INPUTS { + assert!(!parse(input), "{}", input); + } +} + +fn parse<'input>(input: &'input str) -> bool { let source = ExprToken::default(); let lexer = >::try_from(source).unwrap(); let source = (ExprToken::default(), ExprRule::default()); - let parser = >::try_from(source)?; + let parser = >::try_from(source).unwrap(); - let result = parser.run(lexer.run("1 + 2 * 3")); - assert_eq!(result.count(), 0); + let mut parse_itr = parser.run(lexer.run(input)); + let is_err = |state| matches!(state, ParseState::Err(_)); + let err_happened = parse_itr.any(is_err); - Ok(()) + !err_happened } From c0ec3c7d52ebf3945a50280aace1f67ce6ade404 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 19:39:14 +0900 Subject: [PATCH 34/55] =?UTF-8?q?[update]=20ParseState=20->=20ParseEvent?= =?UTF-8?q?=20&=20IRBuilder=20=E3=81=AB=E5=AF=BE=E5=BF=9C=E3=83=A1?= =?UTF-8?q?=E3=82=BD=E3=83=83=E3=83=89=E3=82=92=E7=94=9F=E3=82=84=E3=81=97?= =?UTF-8?q?=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 13 ++++++++ Cargo.toml | 5 ++- crates/cfg/src/rule.rs | 1 + crates/core/Cargo.toml | 2 ++ crates/core/src/lib.rs | 21 +++++++------ crates/core/tests/prebuild.rs | 3 +- crates/core/tests/simple.rs | 3 +- crates/core/tests/simple_multiple.rs | 9 +++--- crates/ir/src/lib.rs | 12 +++++--- crates/ir_sexp/src/lib.rs | 46 +++++++++++++--------------- crates/ir_void/Cargo.toml | 10 ++++++ crates/ir_void/src/lib.rs | 40 ++++++++++++++++++++++++ crates/parse/src/lib.rs | 8 ++--- crates/parse_derive/src/impl/rule.rs | 27 +++++++++++++--- crates/parse_lr1/src/lib.rs | 12 ++++---- crates/parse_lr1/tests/simple.rs | 4 +-- src/lib.rs | 2 ++ 17 files changed, 155 insertions(+), 63 deletions(-) create mode 100644 crates/ir_void/Cargo.toml create mode 100644 crates/ir_void/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 62b6862..acc0e13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,6 +27,7 @@ dependencies = [ "copager_core", "copager_ir", "copager_ir_sexp", + "copager_ir_void", "copager_lex", "copager_lex_regex", "copager_parse", @@ -50,6 +51,8 @@ dependencies = [ "anyhow", "copager_cfg", "copager_core", + "copager_ir", + "copager_ir_void", "copager_lex", "copager_lex_regex", "copager_parse", @@ -79,6 +82,16 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_ir_void" +version = "0.1.1" +dependencies = [ + "anyhow", + "copager_cfg", + "copager_ir", + "thiserror", +] + [[package]] name = "copager_lex" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index 0e1b5e9..68ded0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ copager_lex_regex = { path = "./crates/lex_regex", optional = true } copager_parse = { path = "./crates/parse", optional = true } copager_parse_lr1 = { path = "./crates/parse_lr1", optional = true } copager_ir = { path = "./crates/ir" } +copager_ir_void = { path = "./crates/ir_void", optional = true } copager_ir_sexp = { path = "./crates/ir_sexp", optional = true } [dev-dependencies] @@ -25,7 +26,7 @@ copager = { path = ".", features = ["derive", "all"] } # common default = ["dep:copager_lex", "dep:copager_parse"] derive = ["copager_lex/derive", "copager_parse/derive"] -all = ["dep:copager_lex_regex", "dep:copager_parse_lr1", "dep:copager_ir_sexp"] +all = ["dep:copager_lex_regex", "dep:copager_parse_lr1", "dep:copager_ir_void", "dep:copager_ir_sexp"] # lex regexlex = ["dep:copager_lex_regex"] @@ -34,6 +35,7 @@ regexlex = ["dep:copager_lex_regex"] lr1 = ["dep:copager_parse_lr1"] # ir +void = ["dep:copager_ir_void"] sexp = ["dep:copager_ir_sexp"] [workspace] @@ -48,6 +50,7 @@ members = [ "./crates/parse_derive", "./crates/parse_lr1", "./crates/ir", + "./crates/ir_void", "./crates/ir_sexp", "./crates/utils", ] diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 2f1bd24..4d9d75b 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -8,6 +8,7 @@ pub trait RuleTag where Self: Debug + Copy + Clone + Hash + Eq, { + fn len(&self) -> usize; fn as_rules(&self) -> Vec>; } diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 284ceed..bf8c539 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -11,6 +11,7 @@ serde_cbor = "0.11.2" copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } +copager_ir = { path = "../ir" } copager_utils = { path = "../utils" } [dev-dependencies] @@ -19,3 +20,4 @@ copager_lex = { path = "../lex", features = ["derive"]} copager_lex_regex = { path = "../lex_regex" } copager_parse = { path = "../parse", features = ["derive"] } copager_parse_lr1 = { path = "../parse_lr1" } +copager_ir_void = { path = "../ir_void" } diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index aae2bbd..3912656 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -7,7 +7,8 @@ use serde_cbor::ser::to_vec_packed; use serde_cbor::de::from_slice; use copager_lex::{LexSource, LexDriver}; -use copager_parse::{ParseSource, ParseDriver, ParseState}; +use copager_parse::{ParseSource, ParseDriver, ParseEvent}; +use copager_ir::{IR, IRBuilder}; use copager_utils::cache::Cacheable; pub trait GrammarDesign { @@ -191,25 +192,25 @@ where self } - pub fn process<'input>(&self, input: &'input str) -> anyhow::Result<()> { + pub fn process<'input, I>(&self, input: &'input str) -> anyhow::Result + where + I: IR<'input, ::Tag, ::Tag>>::Tag>, + { assert!(self.lexer.is_some()); assert!(self.parser.is_some()); let lexer = self.lexer.as_ref().unwrap(); let parser = self.parser.as_ref().unwrap(); + let mut ir_builder = ::Tag, ::Tag>>::Tag>>::new(); for result in parser.run(lexer.run(input)) { match result { - ParseState::Consume(token) => { - println!("Consume: {:?}", token); - }, - ParseState::Reduce(rule) => { - println!("Reduce: {:?}", rule); - }, - ParseState::Err(err) => return Err(err), + ParseEvent::Read(token) => ir_builder.on_read(token)?, + ParseEvent::Parse(rule) => ir_builder.on_parse(rule)?, + ParseEvent::Err(err) => return Err(err), } } - Ok(()) + ir_builder.build() } } diff --git a/crates/core/tests/prebuild.rs b/crates/core/tests/prebuild.rs index 2a15fc1..af7db01 100644 --- a/crates/core/tests/prebuild.rs +++ b/crates/core/tests/prebuild.rs @@ -9,6 +9,7 @@ use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; use copager_parse_lr1::LR1; +use copager_ir_void::Void; #[derive( Debug, Default, Copy, Clone, Hash, PartialEq, Eq, @@ -79,7 +80,7 @@ fn main_rs(processor: MyProcessor) -> anyhow::Result<()> { processor .build_lexer()? .build_parser_by_cache() - .process("1 + 2 * 3")?; + .process::("1 + 2 * 3")?; Ok(()) } diff --git a/crates/core/tests/simple.rs b/crates/core/tests/simple.rs index f79174b..4a62fd6 100644 --- a/crates/core/tests/simple.rs +++ b/crates/core/tests/simple.rs @@ -7,6 +7,7 @@ use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; use copager_parse_lr1::LR1; +use copager_ir_void::Void; #[derive( Debug, Default, Copy, Clone, Hash, PartialEq, Eq, @@ -61,7 +62,7 @@ fn simple_success() -> anyhow::Result<()> { MyProcessor::new() .build_lexer()? .build_parser()? - .process("1 + 2 * 3")?; + .process::("1 + 2 * 3")?; Ok(()) } diff --git a/crates/core/tests/simple_multiple.rs b/crates/core/tests/simple_multiple.rs index cc19180..5e8ebc0 100644 --- a/crates/core/tests/simple_multiple.rs +++ b/crates/core/tests/simple_multiple.rs @@ -7,6 +7,7 @@ use copager_lex::LexSource; use copager_lex_regex::RegexLexer; use copager_parse::ParseSource; use copager_parse_lr1::LR1; +use copager_ir_void::Void; #[derive( Debug, Default, Copy, Clone, Hash, PartialEq, Eq, @@ -80,7 +81,7 @@ const ERR_INPUTS: [&str; 7] = [ fn simple_multiple_only_success() { let processor = gen_processor(); for input in OK_INPUTS { - assert!(processor.process(input).is_ok()); + assert!(processor.process::(input).is_ok()); } } @@ -88,7 +89,7 @@ fn simple_multiple_only_success() { fn simple_multiple_only_failure() { let processor = gen_processor(); for input in ERR_INPUTS { - assert!(processor.process(input).is_err()); + assert!(processor.process::(input).is_err()); } } @@ -102,9 +103,9 @@ fn simple_multiple_mix_success_and_failure() { let processor = gen_processor(); for (is_ok, input) in mixed_testcases { if is_ok { - assert!(processor.process(input).is_ok()); + assert!(processor.process::(input).is_ok()); } else { - assert!(processor.process(input).is_err()); + assert!(processor.process::(input).is_err()); } } } diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index 346e624..ac2505d 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -1,21 +1,23 @@ -use copager_cfg::token::TokenTag; +use copager_cfg::token::{TokenTag, Token}; use copager_cfg::rule::RuleTag; -pub trait IR +pub trait IR<'input, T, R> where T: TokenTag, R: RuleTag, { - type Builder: IRBuilder; + type Builder: IRBuilder<'input, T, R, Output = Self>; } -pub trait IRBuilder +pub trait IRBuilder<'input, T, R> where T: TokenTag, R: RuleTag, { - type Output: IR; + type Output: IR<'input, T, R>; fn new() -> Self; + fn on_read(&mut self, token: Token<'input, T>) -> anyhow::Result<()>; + fn on_parse(&mut self, rule: R) -> anyhow::Result<()>; fn build(self) -> anyhow::Result; } diff --git a/crates/ir_sexp/src/lib.rs b/crates/ir_sexp/src/lib.rs index 09395b5..26f4174 100644 --- a/crates/ir_sexp/src/lib.rs +++ b/crates/ir_sexp/src/lib.rs @@ -5,27 +5,27 @@ use copager_cfg::rule::RuleTag; use copager_ir::{IR, IRBuilder}; #[derive(Debug)] -pub enum SExp<'input, T, S> +pub enum SExp<'input, T, R> where T: TokenTag, - S: RuleTag, + R: RuleTag, { List { - tag: S, - elems: Vec>, + rule: R, + elems: Vec>, }, Atom(Token<'input, T>), } -impl Display for SExp<'_, T, S> +impl Display for SExp<'_, T, R> where T: TokenTag, - S: RuleTag + Debug, + R: RuleTag + Debug, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - SExp::List { tag, elems } => { - write!(f, "({:?}", tag)?; + SExp::List { rule, elems } => { + write!(f, "({:?}", rule)?; for elem in elems { write!(f, " {}", elem)?; } @@ -36,7 +36,7 @@ where } } -impl<'input, T, R> IR for SExp<'input, T, R> +impl<'input, T, R> IR<'input, T, R> for SExp<'input, T, R> where T: TokenTag, R: RuleTag, @@ -53,7 +53,7 @@ where stack: Vec>, } -impl <'input, T, R> IRBuilder for SExpBuilder<'input, T, R> +impl <'input, T, R> IRBuilder<'input, T, R> for SExpBuilder<'input, T, R> where T: TokenTag, R: RuleTag, @@ -64,6 +64,17 @@ where SExpBuilder { stack: vec![] } } + fn on_read(&mut self, token: Token<'input, T>) -> anyhow::Result<()> { + self.stack.push(SExp::Atom(token)); + Ok(()) + } + + fn on_parse(&mut self, rule: R) -> anyhow::Result<()> { + let elems = self.stack.split_off(self.stack.len() - rule.len()); + self.stack.push(SExp::List { rule, elems }); + Ok(()) + } + fn build(mut self) -> anyhow::Result> { if self.stack.len() == 1 { Ok(self.stack.pop().unwrap()) @@ -72,18 +83,3 @@ where } } } - -impl<'input, T, R> SExpBuilder<'input, T, R> -where - T: TokenTag, - R: RuleTag, -{ - pub fn push(&mut self, token: Token<'input, T>) { - self.stack.push(SExp::Atom(token)); - } - - pub fn wrap(&mut self, tag: R, cnt: usize) { - let elems = self.stack.split_off(self.stack.len() - cnt); - self.stack.push(SExp::List { tag, elems }); - } -} diff --git a/crates/ir_void/Cargo.toml b/crates/ir_void/Cargo.toml new file mode 100644 index 0000000..1659f14 --- /dev/null +++ b/crates/ir_void/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "copager_ir_void" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager_cfg = { path = "../cfg" } +copager_ir = { path = "../ir" } diff --git a/crates/ir_void/src/lib.rs b/crates/ir_void/src/lib.rs new file mode 100644 index 0000000..dcd8744 --- /dev/null +++ b/crates/ir_void/src/lib.rs @@ -0,0 +1,40 @@ +use std::fmt::Debug; + +use copager_cfg::token::{TokenTag, Token}; +use copager_cfg::rule::RuleTag; +use copager_ir::{IR, IRBuilder}; + +#[derive(Debug)] +pub struct Void; + +impl<'input, T, R> IR<'input, T, R> for Void +where + T: TokenTag, + R: RuleTag, +{ + type Builder = Self; +} + +impl <'input, T, R> IRBuilder<'input, T, R> for Void +where + T: TokenTag, + R: RuleTag, +{ + type Output = Self; + + fn new() -> Void { + Void + } + + fn on_read(&mut self, _: Token<'input, T>) -> anyhow::Result<()> { + Ok(()) + } + + fn on_parse(&mut self, _: R) -> anyhow::Result<()> { + Ok(()) + } + + fn build(self) -> anyhow::Result { + Ok(Void) + } +} diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 082a9c1..7996ee9 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -33,17 +33,17 @@ where Sp: ParseSource, { fn try_from(source: (Sl, Sp)) -> anyhow::Result; - fn run<'input, Il>(&self, lexer: Il) -> impl Iterator> + fn run<'input, Il>(&self, lexer: Il) -> impl Iterator> where Il: Iterator>; } -pub enum ParseState<'input, T, R> +pub enum ParseEvent<'input, T, R> where T: TokenTag, R: RuleTag, { - Consume(Token<'input, T>), - Reduce(R), + Read(Token<'input, T>), + Parse(R), Err(anyhow::Error) } diff --git a/crates/parse_derive/src/impl/rule.rs b/crates/parse_derive/src/impl/rule.rs index 08b722e..bb0790d 100644 --- a/crates/parse_derive/src/impl/rule.rs +++ b/crates/parse_derive/src/impl/rule.rs @@ -16,9 +16,12 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { .collect::>(); let enum_name = &ast.ident; - let enum_matcher_table = parsed_variantes + let enum_matcher_table_i2r = parsed_variantes .iter() - .map(|variant| variant.gen_ident_matcher()); + .map(|variant| variant.gen_matcher_ident_to_rule()); + let enum_matcher_table_i2l = parsed_variantes + .iter() + .map(|variant| variant.gen_matcher_ident_to_len()); let enum_assoc_type = format!("{}", enum_name) .replace("Rule", "Token") .parse::() @@ -29,9 +32,15 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { quote! { impl RuleTag<#enum_assoc_type> for #enum_name { + fn len(&self) -> usize { + match self { + #( #enum_matcher_table_i2l, )* + } + } + fn as_rules(&self) -> Vec> { match self { - #( #enum_matcher_table, )* + #( #enum_matcher_table_i2r, )* } } } @@ -82,7 +91,17 @@ impl<'a> VariantInfo<'a> { quote! { #parent_ident :: #self_ident } } - fn gen_ident_matcher(&self) -> TokenStream { + fn gen_matcher_ident_to_len(&self) -> TokenStream { + let ident = self.gen_ident(); + if self.rules.is_empty() { + quote! { #ident => unimplemented!() } + } else { + let rules_len = &self.rules.len(); + quote! { #ident => #rules_len } + } + } + + fn gen_matcher_ident_to_rule(&self) -> TokenStream { let ident = self.gen_ident(); if self.rules.is_empty() { quote! { #ident => unimplemented!() } diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index 5c69b8c..8287436 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -9,7 +9,7 @@ use serde::{Serialize, Deserialize}; use copager_cfg::token::Token; use copager_lex::LexSource; -use copager_parse::{ParseSource, ParseDriver, ParseState}; +use copager_parse::{ParseSource, ParseDriver, ParseEvent}; use copager_utils::cache::Cacheable; use builder::{LR1Configure, LRAction}; @@ -52,7 +52,7 @@ where Ok(LR1 { tables }) } - gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseState<'input, Sl::Tag, Sp::Tag> + gen fn run<'input, Il>(&self, mut lexer: Il) -> ParseEvent<'input, Sl::Tag, Sp::Tag> where Il: Iterator>, { @@ -71,23 +71,23 @@ where match action { (LRAction::Shift(new_state), Some(token)) => { stack.push(*new_state); - yield ParseState::Consume(token); + yield ParseEvent::Read(token); break; } (LRAction::Reduce(tag, goto, elems_cnt), _) => { stack.truncate(stack.len() - elems_cnt); stack.push(self.tables.goto_table[stack[stack.len() - 1]][*goto]); - yield ParseState::Reduce(*tag); + yield ParseEvent::Parse(*tag); } (LRAction::Accept, _) => { return; } (LRAction::None, Some(token)) => { - yield ParseState::Err(ParseError::new_unexpected_token(token).into()); + yield ParseEvent::Err(ParseError::new_unexpected_token(token).into()); return; } (LRAction::None, None) => { - yield ParseState::Err(ParseError::UnexpectedEOF.into()); + yield ParseEvent::Err(ParseError::UnexpectedEOF.into()); return; } _ => unreachable!(), diff --git a/crates/parse_lr1/tests/simple.rs b/crates/parse_lr1/tests/simple.rs index d2f30c2..1acd706 100644 --- a/crates/parse_lr1/tests/simple.rs +++ b/crates/parse_lr1/tests/simple.rs @@ -4,7 +4,7 @@ use copager_cfg::token::TokenTag; use copager_cfg::rule::{RuleTag, Rule, RuleElem}; use copager_lex::{LexSource, LexDriver}; use copager_lex_regex::RegexLexer; -use copager_parse::{ParseSource, ParseDriver, ParseState}; +use copager_parse::{ParseSource, ParseDriver, ParseEvent}; use copager_parse_lr1::LR1; #[derive( @@ -98,7 +98,7 @@ fn parse<'input>(input: &'input str) -> bool { let parser = >::try_from(source).unwrap(); let mut parse_itr = parser.run(lexer.run(input)); - let is_err = |state| matches!(state, ParseState::Err(_)); + let is_err = |state| matches!(state, ParseEvent::Err(_)); let err_happened = parse_itr.any(is_err); !err_happened diff --git a/src/lib.rs b/src/lib.rs index bb5c5c7..d9e5f31 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,8 @@ pub mod parse { pub mod ir { pub use copager_ir::*; + #[cfg(any(feature = "all", feature = "void"))] + pub use copager_ir_void::*; #[cfg(any(feature = "all", feature = "sexp"))] pub use copager_ir_sexp::*; } From 0b6789dc461b89984b933fd6f7e8cb5f9085c56a Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 19:47:43 +0900 Subject: [PATCH 35/55] [change] IR -> IR --- Cargo.lock | 6 +++++ crates/core/src/lib.rs | 4 +-- crates/ir/Cargo.toml | 2 ++ crates/ir/src/lib.rs | 25 ++++++++--------- crates/ir_sexp/Cargo.toml | 2 ++ crates/ir_sexp/src/lib.rs | 57 +++++++++++++++++++++------------------ crates/ir_void/Cargo.toml | 2 ++ crates/ir_void/src/lib.rs | 21 ++++++++------- 8 files changed, 69 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index acc0e13..62993ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,6 +69,8 @@ version = "0.1.1" dependencies = [ "anyhow", "copager_cfg", + "copager_lex", + "copager_parse", "thiserror", ] @@ -79,6 +81,8 @@ dependencies = [ "anyhow", "copager_cfg", "copager_ir", + "copager_lex", + "copager_parse", "thiserror", ] @@ -89,6 +93,8 @@ dependencies = [ "anyhow", "copager_cfg", "copager_ir", + "copager_lex", + "copager_parse", "thiserror", ] diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 3912656..52295fc 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -194,7 +194,7 @@ where pub fn process<'input, I>(&self, input: &'input str) -> anyhow::Result where - I: IR<'input, ::Tag, ::Tag>>::Tag>, + I: IR<'input, G::Lex, G::Parse>, { assert!(self.lexer.is_some()); assert!(self.parser.is_some()); @@ -202,7 +202,7 @@ where let lexer = self.lexer.as_ref().unwrap(); let parser = self.parser.as_ref().unwrap(); - let mut ir_builder = ::Tag, ::Tag>>::Tag>>::new(); + let mut ir_builder = I::Builder::new(); for result in parser.run(lexer.run(input)) { match result { ParseEvent::Read(token) => ir_builder.on_read(token)?, diff --git a/crates/ir/Cargo.toml b/crates/ir/Cargo.toml index c9af348..ac192cb 100644 --- a/crates/ir/Cargo.toml +++ b/crates/ir/Cargo.toml @@ -7,3 +7,5 @@ edition = "2021" anyhow = { workspace = true } thiserror = { workspace = true } copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index ac2505d..bfd1aee 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -1,23 +1,24 @@ -use copager_cfg::token::{TokenTag, Token}; -use copager_cfg::rule::RuleTag; +use copager_cfg::token::Token; +use copager_lex::LexSource; +use copager_parse::ParseSource; -pub trait IR<'input, T, R> +pub trait IR<'input, Sl, Sp> where - T: TokenTag, - R: RuleTag, + Sl: LexSource, + Sp: ParseSource, { - type Builder: IRBuilder<'input, T, R, Output = Self>; + type Builder: IRBuilder<'input, Sl, Sp, Output = Self>; } -pub trait IRBuilder<'input, T, R> +pub trait IRBuilder<'input, Sl, Sp> where - T: TokenTag, - R: RuleTag, + Sl: LexSource, + Sp: ParseSource, { - type Output: IR<'input, T, R>; + type Output: IR<'input, Sl, Sp>; fn new() -> Self; - fn on_read(&mut self, token: Token<'input, T>) -> anyhow::Result<()>; - fn on_parse(&mut self, rule: R) -> anyhow::Result<()>; + fn on_read(&mut self, token: Token<'input, Sl::Tag>) -> anyhow::Result<()>; + fn on_parse(&mut self, rule: Sp::Tag) -> anyhow::Result<()>; fn build(self) -> anyhow::Result; } diff --git a/crates/ir_sexp/Cargo.toml b/crates/ir_sexp/Cargo.toml index de30731..9257f4c 100644 --- a/crates/ir_sexp/Cargo.toml +++ b/crates/ir_sexp/Cargo.toml @@ -7,4 +7,6 @@ edition = "2021" anyhow = { workspace = true } thiserror = { workspace = true } copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } copager_ir = { path = "../ir" } diff --git a/crates/ir_sexp/src/lib.rs b/crates/ir_sexp/src/lib.rs index 26f4174..5ab72c4 100644 --- a/crates/ir_sexp/src/lib.rs +++ b/crates/ir_sexp/src/lib.rs @@ -1,26 +1,30 @@ use std::fmt::{Debug, Display}; -use copager_cfg::token::{TokenTag, Token}; +use copager_cfg::token::Token; use copager_cfg::rule::RuleTag; +use copager_lex::LexSource; +use copager_parse::ParseSource; use copager_ir::{IR, IRBuilder}; #[derive(Debug)] -pub enum SExp<'input, T, R> +pub enum SExp<'input, Sl, Sp> where - T: TokenTag, - R: RuleTag, + Sl: LexSource, + Sp: ParseSource, { List { - rule: R, - elems: Vec>, + rule: Sp::Tag, + elems: Vec>, }, - Atom(Token<'input, T>), + Atom(Token<'input, Sl::Tag>), } -impl Display for SExp<'_, T, R> +impl Display for SExp<'_, Sl, Sp> where - T: TokenTag, - R: RuleTag + Debug, + Sl: LexSource, + Sp: ParseSource, + Sp::Tag: Debug, + Sl::Tag: Debug, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -36,46 +40,47 @@ where } } -impl<'input, T, R> IR<'input, T, R> for SExp<'input, T, R> +impl<'input, Sl, Sp> IR<'input, Sl, Sp> for SExp<'input, Sl, Sp> where - T: TokenTag, - R: RuleTag, + Sl: LexSource, + Sp: ParseSource, { - type Builder = SExpBuilder<'input, T, R>; + type Builder = SExpBuilder<'input, Sl, Sp>; } #[derive(Debug)] -pub struct SExpBuilder<'input, T, R> +pub struct SExpBuilder<'input, Sl, Sp> where - T: TokenTag, - R: RuleTag, + Sl: LexSource, + Sp: ParseSource, { - stack: Vec>, + stack: Vec>, } -impl <'input, T, R> IRBuilder<'input, T, R> for SExpBuilder<'input, T, R> + +impl <'input, Sl, Sp> IRBuilder<'input, Sl, Sp> for SExpBuilder<'input, Sl, Sp> where - T: TokenTag, - R: RuleTag, + Sl: LexSource, + Sp: ParseSource, { - type Output = SExp<'input, T, R>; + type Output = SExp<'input, Sl, Sp>; - fn new() -> SExpBuilder<'input, T, R> { + fn new() -> SExpBuilder<'input, Sl, Sp> { SExpBuilder { stack: vec![] } } - fn on_read(&mut self, token: Token<'input, T>) -> anyhow::Result<()> { + fn on_read(&mut self, token: Token<'input, Sl::Tag>) -> anyhow::Result<()> { self.stack.push(SExp::Atom(token)); Ok(()) } - fn on_parse(&mut self, rule: R) -> anyhow::Result<()> { + fn on_parse(&mut self, rule: Sp::Tag) -> anyhow::Result<()> { let elems = self.stack.split_off(self.stack.len() - rule.len()); self.stack.push(SExp::List { rule, elems }); Ok(()) } - fn build(mut self) -> anyhow::Result> { + fn build(mut self) -> anyhow::Result> { if self.stack.len() == 1 { Ok(self.stack.pop().unwrap()) } else { diff --git a/crates/ir_void/Cargo.toml b/crates/ir_void/Cargo.toml index 1659f14..66bb712 100644 --- a/crates/ir_void/Cargo.toml +++ b/crates/ir_void/Cargo.toml @@ -7,4 +7,6 @@ edition = "2021" anyhow = { workspace = true } thiserror = { workspace = true } copager_cfg = { path = "../cfg" } +copager_lex = { path = "../lex" } +copager_parse = { path = "../parse" } copager_ir = { path = "../ir" } diff --git a/crates/ir_void/src/lib.rs b/crates/ir_void/src/lib.rs index dcd8744..f6f7113 100644 --- a/crates/ir_void/src/lib.rs +++ b/crates/ir_void/src/lib.rs @@ -1,24 +1,25 @@ use std::fmt::Debug; -use copager_cfg::token::{TokenTag, Token}; -use copager_cfg::rule::RuleTag; +use copager_cfg::token::Token; +use copager_lex::LexSource; +use copager_parse::ParseSource; use copager_ir::{IR, IRBuilder}; #[derive(Debug)] pub struct Void; -impl<'input, T, R> IR<'input, T, R> for Void +impl<'input, Sl, Sp> IR<'input, Sl, Sp> for Void where - T: TokenTag, - R: RuleTag, + Sl: LexSource, + Sp: ParseSource, { type Builder = Self; } -impl <'input, T, R> IRBuilder<'input, T, R> for Void +impl <'input, Sl, Sp> IRBuilder<'input, Sl, Sp> for Void where - T: TokenTag, - R: RuleTag, + Sl: LexSource, + Sp: ParseSource, { type Output = Self; @@ -26,11 +27,11 @@ where Void } - fn on_read(&mut self, _: Token<'input, T>) -> anyhow::Result<()> { + fn on_read(&mut self, _: Token<'input, Sl::Tag>) -> anyhow::Result<()> { Ok(()) } - fn on_parse(&mut self, _: R) -> anyhow::Result<()> { + fn on_parse(&mut self, _: Sp::Tag) -> anyhow::Result<()> { Ok(()) } From 49527fa22ca98e929cba89322eb1dc45a0fb492f Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 19:57:59 +0900 Subject: [PATCH 36/55] =?UTF-8?q?[remove]=20=E3=83=AB=E3=83=BC=E3=83=88?= =?UTF-8?q?=E3=81=AB=E3=81=82=E3=82=8B=20tests=20=E3=82=92=E5=89=8A?= =?UTF-8?q?=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/derive.rs | 41 -------------------------------------- tests/serde.rs | 53 ------------------------------------------------- 2 files changed, 94 deletions(-) delete mode 100644 tests/derive.rs delete mode 100644 tests/serde.rs diff --git a/tests/derive.rs b/tests/derive.rs deleted file mode 100644 index 07d512b..0000000 --- a/tests/derive.rs +++ /dev/null @@ -1,41 +0,0 @@ -use copager::lex::LexSource; -use copager::parse::ParseSource; -use copager::prelude::*; - -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] -enum ExprToken { - #[token(text = r"\+")] - Plus, - #[token(text = r"-")] - Minus, - #[token(text = r"\*")] - Mul, - #[token(text = r"/")] - Div, - #[token(text = r"\(")] - BracketL, - #[token(text = r"\)")] - BracketR, - #[token(text = r"[1-9][0-9]*")] - Num, - #[token(text = r"[ \t\n]+", ignored)] - _Whitespace, -} - -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] -enum ExprRule { - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} - -#[test] -fn check_compile() {} diff --git a/tests/serde.rs b/tests/serde.rs deleted file mode 100644 index a928b5b..0000000 --- a/tests/serde.rs +++ /dev/null @@ -1,53 +0,0 @@ -// use serde::{Serialize, Deserialize}; - -// use copager::algorithm::LR1; -// use copager::cfg::*; -// use copager::Parser; - -// #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, TokenSet)] -// enum TestTokenSet { -// #[token(regex = r"\+")] -// Plus, -// #[token(regex = r"-")] -// Minus, -// #[token(regex = r"\*")] -// Mul, -// #[token(regex = r"/")] -// Div, -// #[token(regex = r"\(")] -// BracketL, -// #[token(regex = r"\)")] -// BracketR, -// #[token(regex = r"[1-9][0-9]*")] -// Num, -// #[token(regex = r"[ \t\n]+", ignored)] -// _Whitespace, -// } - -// #[derive(Debug, Clone, Copy, Serialize, Deserialize, Syntax)] -// enum TestSyntax { -// #[rule(" ::= Plus ")] -// #[rule(" ::= Minus ")] -// #[rule(" ::= ")] -// Expr, -// #[rule(" ::= Mul ")] -// #[rule(" ::= Div ")] -// #[rule(" ::= ")] -// Term, -// #[rule(" ::= BracketL BracketR")] -// #[rule(" ::= Num")] -// Num, -// } - -// type TestParser<'a> = Parser::<'a, LR1<'a, TestTokenSet, TestSyntax>>; - -// #[test] -// fn check_serde() { -// // build.rs -// let parser = TestParser::new().unwrap(); -// let serialized = serde_json::to_string(&parser).unwrap(); - -// // main.rs -// let deserialized: TestParser = serde_json::from_str(&serialized).unwrap(); -// deserialized.parse("10 * (20 - 30)").unwrap(); -// } From 0755f680173dfccb26cbe70ffcd7d04d34dcaff9 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 20:03:15 +0900 Subject: [PATCH 37/55] =?UTF-8?q?[clean]=20all=20=E3=83=95=E3=83=A9?= =?UTF-8?q?=E3=82=B0=E5=AE=9A=E7=BE=A9=E3=81=AE=E8=A8=98=E8=BF=B0=E6=96=B9?= =?UTF-8?q?=E6=B3=95=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 68ded0d..6d2be06 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ copager = { path = ".", features = ["derive", "all"] } # common default = ["dep:copager_lex", "dep:copager_parse"] derive = ["copager_lex/derive", "copager_parse/derive"] -all = ["dep:copager_lex_regex", "dep:copager_parse_lr1", "dep:copager_ir_void", "dep:copager_ir_sexp"] +all = ["regexlex", "lr1", "void", "sexp"] # lex regexlex = ["dep:copager_lex_regex"] From 44ff50c7629edd09c6ed849096f21e13399a0635 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 20:10:20 +0900 Subject: [PATCH 38/55] =?UTF-8?q?[clean]=20regex,=20regex-macro=20?= =?UTF-8?q?=E3=82=92=20workspace.dependencies=20=E3=81=8B=E3=82=89?= =?UTF-8?q?=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6d2be06..78bbf14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,5 +60,3 @@ exclude = [] anyhow = "1.0.82" thiserror = "1.0.58" serde = { version = "1.0.197", features = ["derive"] } -regex = "1.10.4" -regex-macro = "0.2.0" From b11b72079d1e4409dda3ff7fddc161fee6a8a42d Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 20:31:55 +0900 Subject: [PATCH 39/55] =?UTF-8?q?[fix]=20ParseTag::len=20=E5=89=8A?= =?UTF-8?q?=E9=99=A4=20&=20=20ParseEvent::Parse=20=E3=81=AB=20len=20?= =?UTF-8?q?=E8=A6=81=E7=B4=A0=E3=82=92=E6=8C=81=E3=81=9F=E3=81=9B=E3=82=8B?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 3 +++ crates/cfg/src/rule.rs | 1 - crates/core/src/lib.rs | 2 +- crates/ir/src/lib.rs | 2 +- crates/ir_sexp/Cargo.toml | 7 +++++++ crates/ir_sexp/src/lib.rs | 5 ++--- crates/ir_void/src/lib.rs | 2 +- crates/parse/src/lib.rs | 10 ++++++++-- crates/parse_derive/src/impl/rule.rs | 19 ------------------- crates/parse_lr1/src/lib.rs | 2 +- 10 files changed, 24 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 62993ba..b184cf3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -81,8 +81,11 @@ dependencies = [ "anyhow", "copager_cfg", "copager_ir", + "copager_ir_sexp", "copager_lex", + "copager_lex_regex", "copager_parse", + "copager_parse_lr1", "thiserror", ] diff --git a/crates/cfg/src/rule.rs b/crates/cfg/src/rule.rs index 4d9d75b..2f1bd24 100644 --- a/crates/cfg/src/rule.rs +++ b/crates/cfg/src/rule.rs @@ -8,7 +8,6 @@ pub trait RuleTag where Self: Debug + Copy + Clone + Hash + Eq, { - fn len(&self) -> usize; fn as_rules(&self) -> Vec>; } diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 52295fc..ecf9e59 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -206,7 +206,7 @@ where for result in parser.run(lexer.run(input)) { match result { ParseEvent::Read(token) => ir_builder.on_read(token)?, - ParseEvent::Parse(rule) => ir_builder.on_parse(rule)?, + ParseEvent::Parse{ rule,len } => ir_builder.on_parse(rule, len)?, ParseEvent::Err(err) => return Err(err), } } diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index bfd1aee..a45b3a5 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -19,6 +19,6 @@ where fn new() -> Self; fn on_read(&mut self, token: Token<'input, Sl::Tag>) -> anyhow::Result<()>; - fn on_parse(&mut self, rule: Sp::Tag) -> anyhow::Result<()>; + fn on_parse(&mut self, rule: Sp::Tag, len: usize) -> anyhow::Result<()>; fn build(self) -> anyhow::Result; } diff --git a/crates/ir_sexp/Cargo.toml b/crates/ir_sexp/Cargo.toml index 9257f4c..fc26a85 100644 --- a/crates/ir_sexp/Cargo.toml +++ b/crates/ir_sexp/Cargo.toml @@ -10,3 +10,10 @@ copager_cfg = { path = "../cfg" } copager_lex = { path = "../lex" } copager_parse = { path = "../parse" } copager_ir = { path = "../ir" } + +[dev-dependencies] +copager_lex = { path = "../lex", features = ["derive"] } +copager_lex_regex = { path = "../lex_regex" } +copager_parse = { path = "../parse", features = ["derive"] } +copager_parse_lr1 = { path = "../parse_lr1" } +copager_ir_sexp = { path = "." } diff --git a/crates/ir_sexp/src/lib.rs b/crates/ir_sexp/src/lib.rs index 5ab72c4..286a953 100644 --- a/crates/ir_sexp/src/lib.rs +++ b/crates/ir_sexp/src/lib.rs @@ -1,7 +1,6 @@ use std::fmt::{Debug, Display}; use copager_cfg::token::Token; -use copager_cfg::rule::RuleTag; use copager_lex::LexSource; use copager_parse::ParseSource; use copager_ir::{IR, IRBuilder}; @@ -74,8 +73,8 @@ where Ok(()) } - fn on_parse(&mut self, rule: Sp::Tag) -> anyhow::Result<()> { - let elems = self.stack.split_off(self.stack.len() - rule.len()); + fn on_parse(&mut self, rule: Sp::Tag, len: usize) -> anyhow::Result<()> { + let elems = self.stack.split_off(self.stack.len() - len); self.stack.push(SExp::List { rule, elems }); Ok(()) } diff --git a/crates/ir_void/src/lib.rs b/crates/ir_void/src/lib.rs index f6f7113..e776042 100644 --- a/crates/ir_void/src/lib.rs +++ b/crates/ir_void/src/lib.rs @@ -31,7 +31,7 @@ where Ok(()) } - fn on_parse(&mut self, _: Sp::Tag) -> anyhow::Result<()> { + fn on_parse(&mut self, _: Sp::Tag, _: usize) -> anyhow::Result<()> { Ok(()) } diff --git a/crates/parse/src/lib.rs b/crates/parse/src/lib.rs index 7996ee9..760f467 100644 --- a/crates/parse/src/lib.rs +++ b/crates/parse/src/lib.rs @@ -43,7 +43,13 @@ where T: TokenTag, R: RuleTag, { + // Parsing Event Read(Token<'input, T>), - Parse(R), - Err(anyhow::Error) + Parse { + rule: R, + len: usize, + }, + + // Control + Err(anyhow::Error), } diff --git a/crates/parse_derive/src/impl/rule.rs b/crates/parse_derive/src/impl/rule.rs index bb0790d..5031024 100644 --- a/crates/parse_derive/src/impl/rule.rs +++ b/crates/parse_derive/src/impl/rule.rs @@ -19,9 +19,6 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { let enum_matcher_table_i2r = parsed_variantes .iter() .map(|variant| variant.gen_matcher_ident_to_rule()); - let enum_matcher_table_i2l = parsed_variantes - .iter() - .map(|variant| variant.gen_matcher_ident_to_len()); let enum_assoc_type = format!("{}", enum_name) .replace("Rule", "Token") .parse::() @@ -32,12 +29,6 @@ pub fn proc_macro_impl(ast: DeriveInput) -> TokenStream { quote! { impl RuleTag<#enum_assoc_type> for #enum_name { - fn len(&self) -> usize { - match self { - #( #enum_matcher_table_i2l, )* - } - } - fn as_rules(&self) -> Vec> { match self { #( #enum_matcher_table_i2r, )* @@ -91,16 +82,6 @@ impl<'a> VariantInfo<'a> { quote! { #parent_ident :: #self_ident } } - fn gen_matcher_ident_to_len(&self) -> TokenStream { - let ident = self.gen_ident(); - if self.rules.is_empty() { - quote! { #ident => unimplemented!() } - } else { - let rules_len = &self.rules.len(); - quote! { #ident => #rules_len } - } - } - fn gen_matcher_ident_to_rule(&self) -> TokenStream { let ident = self.gen_ident(); if self.rules.is_empty() { diff --git a/crates/parse_lr1/src/lib.rs b/crates/parse_lr1/src/lib.rs index 8287436..0e1a754 100644 --- a/crates/parse_lr1/src/lib.rs +++ b/crates/parse_lr1/src/lib.rs @@ -77,7 +77,7 @@ where (LRAction::Reduce(tag, goto, elems_cnt), _) => { stack.truncate(stack.len() - elems_cnt); stack.push(self.tables.goto_table[stack[stack.len() - 1]][*goto]); - yield ParseEvent::Parse(*tag); + yield ParseEvent::Parse { rule: *tag, len: *elems_cnt }; } (LRAction::Accept, _) => { return; From 4d1265ee137b771d4a0546602496c43f94e12d5b Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 20:53:56 +0900 Subject: [PATCH 40/55] =?UTF-8?q?[add]=20ir=5Fsexp=20=E3=81=AE=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/ir_sexp/tests/simple.rs | 138 +++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 crates/ir_sexp/tests/simple.rs diff --git a/crates/ir_sexp/tests/simple.rs b/crates/ir_sexp/tests/simple.rs new file mode 100644 index 0000000..0f42f78 --- /dev/null +++ b/crates/ir_sexp/tests/simple.rs @@ -0,0 +1,138 @@ +use copager_cfg::token::TokenTag; +use copager_cfg::rule::{RuleTag, Rule, RuleElem}; +use copager_lex::{LexSource, LexDriver}; +use copager_lex_regex::RegexLexer; +use copager_parse::{ParseSource, ParseDriver, ParseEvent}; +use copager_parse_lr1::LR1; +use copager_ir::{IR, IRBuilder}; +use copager_ir_sexp::SExp; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyIR = SExp<'static, ExprToken, ExprRule>; + +#[test] +fn simple_display() { + let ir = parse("1"); + assert!(ir.is_ok()); + assert_eq!(ir.unwrap().to_string(), r#"(Expr (Term (Num "1")))"#); + + let ir = parse("1 + 1"); + assert!(ir.is_ok()); + assert_eq!(ir.unwrap().to_string(), r#"(Expr (Expr (Term (Num "1"))) "+" (Term (Num "1")))"#); +} + +#[test] +fn simple_eval() { + assert_eq!(eval(&parse("1").unwrap()), 1); + assert_eq!(eval(&parse("1 + 2").unwrap()), 3); + assert_eq!(eval(&parse("1 + 2 * 3").unwrap()), 7); + assert_eq!(eval(&parse("(1 + 2) * 3").unwrap()), 9); +} + +fn parse<'input>(input: &'input str) -> anyhow::Result> { + let source = ExprToken::default(); + let lexer = >::try_from(source).unwrap(); + + let source = (ExprToken::default(), ExprRule::default()); + let parser = >::try_from(source).unwrap(); + + let mut ir_builder = >::Builder::new(); + for event in parser.run(lexer.run(input)) { + match event { + ParseEvent::Read(token) => { + ir_builder.on_read(token).unwrap(); + } + ParseEvent::Parse { rule, len } => { + ir_builder.on_parse(rule, len).unwrap(); + } + ParseEvent::Err(err) => { + return Err(anyhow::anyhow!("{:?}", err)); + } + } + } + + ir_builder.build() +} + +fn eval(ir: &SExp<'static, ExprToken, ExprRule>) -> i32 { + macro_rules! match_atom { + ($term:expr, $($kind:pat => $block:expr),* $(,)?) => { + match $term { + SExp::Atom(token) => { + match token.kind { + $($kind => $block,)* + _ => unreachable!(), + } + } + _ => unreachable!(), + } + } + } + + match ir { + SExp::List { rule, elems } => { + match rule { + ExprRule::Expr if elems.len() == 1 => eval(&elems[0]), + ExprRule::Expr => { + let lhs = eval(&elems[0]); + let rhs = eval(&elems[2]); + match_atom!(elems[1], + ExprToken::Plus => lhs + rhs, + ExprToken::Minus => lhs - rhs, + ) + } + ExprRule::Term if elems.len() == 1 => eval(&elems[0]), + ExprRule::Term => { + let lhs = eval(&elems[0]); + let rhs = eval(&elems[2]); + match_atom!(elems[1], + ExprToken::Mul => lhs * rhs, + ExprToken::Div => lhs / rhs, + ) + } + ExprRule::Num if elems.len() == 1 => eval(&elems[0]), + ExprRule::Num => eval(&elems[1]), + + } + } + SExp::Atom(token) => token.as_str().parse().unwrap(), + } +} From c9ba112a363935456e782afa3781988a1587e4fa Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 23:11:04 +0900 Subject: [PATCH 41/55] [add] examples/oneshot --- Cargo.lock | 9 +++ Cargo.toml | 4 ++ crates/ir_sexp/src/lib.rs | 2 +- examples/expr.rs | 42 -------------- examples/oneshot/Cargo.toml | 9 +++ examples/oneshot/src/main.rs | 107 +++++++++++++++++++++++++++++++++++ 6 files changed, 130 insertions(+), 43 deletions(-) delete mode 100644 examples/expr.rs create mode 100644 examples/oneshot/Cargo.toml create mode 100644 examples/oneshot/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index b184cf3..3d26de8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,6 +197,15 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "example_oneshot" +version = "0.1.0" +dependencies = [ + "anyhow", + "copager", + "thiserror", +] + [[package]] name = "half" version = "1.8.3" diff --git a/Cargo.toml b/Cargo.toml index 78bbf14..adc9653 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ sexp = ["dep:copager_ir_sexp"] [workspace] resolver = "2" members = [ + # Copager "./crates/core", "./crates/cfg", "./crates/lex", @@ -53,6 +54,9 @@ members = [ "./crates/ir_void", "./crates/ir_sexp", "./crates/utils", + + # Examples + "./examples/oneshot", ] exclude = [] diff --git a/crates/ir_sexp/src/lib.rs b/crates/ir_sexp/src/lib.rs index 286a953..a24f3c6 100644 --- a/crates/ir_sexp/src/lib.rs +++ b/crates/ir_sexp/src/lib.rs @@ -79,7 +79,7 @@ where Ok(()) } - fn build(mut self) -> anyhow::Result> { + fn build(mut self) -> anyhow::Result { if self.stack.len() == 1 { Ok(self.stack.pop().unwrap()) } else { diff --git a/examples/expr.rs b/examples/expr.rs deleted file mode 100644 index 5b4a6f7..0000000 --- a/examples/expr.rs +++ /dev/null @@ -1,42 +0,0 @@ -use copager::lex::LexSource; -use copager::parse::ParseSource; -use copager::prelude::*; - -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, LexSource)] -enum ExprToken { - #[token(text = r"\+")] - Plus, - #[token(text = r"-")] - Minus, - #[token(text = r"\*")] - Mul, - #[token(text = r"/")] - Div, - #[token(text = r"\(")] - BracketL, - #[token(text = r"\)")] - BracketR, - #[token(text = r"[1-9][0-9]*")] - Num, - #[token(text = r"[ \t\n]+", ignored)] - _Whitespace, -} - -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] -enum ExprRule { - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} - -fn main() -> anyhow::Result<()> { - Ok(()) -} diff --git a/examples/oneshot/Cargo.toml b/examples/oneshot/Cargo.toml new file mode 100644 index 0000000..b38a4b8 --- /dev/null +++ b/examples/oneshot/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "example_oneshot" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager = { path = "../..", features = ["derive", "regexlex", "lr1", "sexp"] } diff --git a/examples/oneshot/src/main.rs b/examples/oneshot/src/main.rs new file mode 100644 index 0000000..4bbb9bb --- /dev/null +++ b/examples/oneshot/src/main.rs @@ -0,0 +1,107 @@ +use std::io::stdin; + +use copager::lex::{LexSource, RegexLexer}; +use copager::parse::{ParseSource, LR1}; +use copager::ir::SExp; +use copager::prelude::*; +use copager::{Grammar, Processor}; + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, LexSource)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq, ParseSource)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyGrammar = Grammar; +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +fn eval(ir: &SExp<'_, ExprToken, ExprRule>) -> i32 { + macro_rules! match_atom { + ($term:expr, $($kind:pat => $block:expr),* $(,)?) => { + match $term { + SExp::Atom(token) => { + match token.kind { + $($kind => $block,)* + _ => unreachable!(), + } + } + _ => unreachable!(), + } + } + } + + match ir { + SExp::List { rule, elems } => { + match rule { + ExprRule::Expr if elems.len() == 1 => eval(&elems[0]), + ExprRule::Expr => { + let lhs = eval(&elems[0]); + let rhs = eval(&elems[2]); + match_atom!(elems[1], + ExprToken::Plus => lhs + rhs, + ExprToken::Minus => lhs - rhs, + ) + } + ExprRule::Term if elems.len() == 1 => eval(&elems[0]), + ExprRule::Term => { + let lhs = eval(&elems[0]); + let rhs = eval(&elems[2]); + match_atom!(elems[1], + ExprToken::Mul => lhs * rhs, + ExprToken::Div => lhs / rhs, + ) + } + ExprRule::Num if elems.len() == 1 => eval(&elems[0]), + ExprRule::Num => eval(&elems[1]), + + } + } + SExp::Atom(token) => token.as_str().parse().unwrap(), + } +} + +fn main() -> anyhow::Result<()> { + let mut input = String::new(); + stdin().read_line(&mut input)?; + + let sexp = MyProcessor::new() + .build_lexer()? + .build_parser()? + .process::>(&input)?; + println!("{} = {}", input.trim(), eval(&sexp)); + + Ok(()) +} From 1b8d3377cb9b2c1144a553376f58dbf922b17e8a Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 23:28:02 +0900 Subject: [PATCH 42/55] =?UTF-8?q?[update]=20ParseError::display=20?= =?UTF-8?q?=E3=81=A7=20pretty=5Fprint=20=E3=82=92=E8=A1=8C=E3=81=86?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/core/src/error.rs | 59 ++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index 4c5f915..0c63504 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -15,7 +15,36 @@ pub struct ParseError { impl Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.err) + fn pretty_print( + f: &mut std::fmt::Formatter<'_>, + input: &str, + pos: (usize, usize) + ) -> std::fmt::Result { + writeln!(f, "-----")?; + + let (row, col) = (pos.0 as i32 - 1, pos.1 as i32 - 1); + let lines = input.split('\n'); + let neighbor_lines = lines + .skip(max(0, row - 2) as usize) + .take(min(row + 1, 3) as usize); + + for (idx, line) in neighbor_lines.enumerate() { + let row = max(1, row - 1) + (idx as i32); + writeln!(f, "{:2}: {}", row, line)?; + } + + writeln!(f, " {}^ here", " ".repeat(col as usize))?; + writeln!(f, "Found at line {}, column {}.", row + 1, col + 1)?; + writeln!(f, "-----") + } + + writeln!(f, "{}", self.err)?; + match (&self.src, self.pos) { + (Some(src), Some(pos)) => pretty_print(f, &src, pos)?, + _ => {}, + } + + Ok(()) } } @@ -57,32 +86,4 @@ impl ParseError { pos: Some((rows, cols)), } } - - pub fn pretty_print(&self) { - let pretty_printer = |input: &str, pos: (usize, usize)| { - eprintln!("-----"); - - let (row, col) = (pos.0 as i32 - 1, pos.1 as i32 - 1); - let lines = input.split('\n'); - let neighbor_lines = lines - .skip(max(0, row - 2) as usize) - .take(min(row + 1, 3) as usize); - - neighbor_lines.enumerate().for_each(|(idx, line)| { - let row = max(1, row - 1) + (idx as i32); - println!("{:2}: {}", row, line); - }); - - eprintln!(" {}^ here", " ".repeat(col as usize)); - eprintln!("Error at line {}, column {}.", row + 1, col + 1); - eprintln!("-----\n"); - }; - - match (&self.src, self.pos) { - (Some(src), Some(pos)) => { - pretty_printer(&src, pos); - } - _ => {}, - } - } } From 35f34e30c64c39a30a51d9999e742d212d4223d1 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 23:30:08 +0900 Subject: [PATCH 43/55] [update] README.md --- README.md | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 721eec6..99d439c 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,46 @@ -# Parsergen +# Copager Rust製パーサジェネレータ ## Features +### Common + - `derive` +- `all` + +### Lex + +- `regexlex` : [crates/lex_regex](crates/lex_regex) + +### Parse + +- `lr1` : [crates/parse_lr1](crates/parse_lr1) + +### IR + +- `void` : [crates/ir_void](crates/ir_void) +- `sexp` : [crates/ir_sexp](crates/ir_sexp) ## Examples -[examples/expr.rs](examples/expr.rs) +[examples/oneshot](examples/oneshot) + +### ok ``` -$ cargo run --example expr -(10+20)/((30*40)-50) -Accepted : (Expr (Term (Term (Num "(" (Expr (Expr (Term (Num "10"))) "+" (Term (Num "20"))) ")")) "/" (Num "(" (Expr (Expr (Term (Num "(" (Expr (Term (Term (Num "30")) "*" (Num "40"))) ")"))) "-" (Term (Num "50"))) ")"))) +$ echo "(10 * (20 + 30)) / (40 + 60)" | cargo run -p example_oneshot +(10 * (20 + 30)) / (40 + 60) = 5 +``` -$ cargo run --example expr -10** +### error + +``` +$ echo "(10 -)" | cargo run -p example_oneshot +Error: Unexpected token "BracketR" found ----- - 1: 10** - ^ here -Error at line 1, column 4. + 1: (10 - ) + ^ here +Found at line 1, column 7. ----- - -Rejected : Unexpected token "Mul" found ``` From 6e0363a7f0a0b67db9a14df56bce1a8d755dc46b Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sat, 7 Sep 2024 23:40:03 +0900 Subject: [PATCH 44/55] [update] examples/oneshot --- README.md | 14 +++++------ examples/oneshot/src/main.rs | 47 +----------------------------------- 2 files changed, 8 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 99d439c..def24ee 100644 --- a/README.md +++ b/README.md @@ -29,18 +29,18 @@ Rust製パーサジェネレータ ### ok ``` -$ echo "(10 * (20 + 30)) / (40 + 60)" | cargo run -p example_oneshot -(10 * (20 + 30)) / (40 + 60) = 5 +$ echo "10 * (20 + 30)" | cargo run -p example_oneshot +Success : (Expr (Term (Term (Num "10")) "*" (Num "(" (Expr (Expr (Term (Num "20"))) "+" (Term (Num "30"))) ")"))) ``` ### error ``` -$ echo "(10 -)" | cargo run -p example_oneshot +$ echo "(10 *)" | cargo run -p example_oneshot Error: Unexpected token "BracketR" found ----- - 1: (10 - ) - ^ here -Found at line 1, column 7. ------ + 1: (10 *) + ^ here +Found at line 1, column 6. +---- ``` diff --git a/examples/oneshot/src/main.rs b/examples/oneshot/src/main.rs index 4bbb9bb..8971e61 100644 --- a/examples/oneshot/src/main.rs +++ b/examples/oneshot/src/main.rs @@ -48,51 +48,6 @@ type MyLexer = RegexLexer; type MyParser = LR1; type MyProcessor = Processor; -fn eval(ir: &SExp<'_, ExprToken, ExprRule>) -> i32 { - macro_rules! match_atom { - ($term:expr, $($kind:pat => $block:expr),* $(,)?) => { - match $term { - SExp::Atom(token) => { - match token.kind { - $($kind => $block,)* - _ => unreachable!(), - } - } - _ => unreachable!(), - } - } - } - - match ir { - SExp::List { rule, elems } => { - match rule { - ExprRule::Expr if elems.len() == 1 => eval(&elems[0]), - ExprRule::Expr => { - let lhs = eval(&elems[0]); - let rhs = eval(&elems[2]); - match_atom!(elems[1], - ExprToken::Plus => lhs + rhs, - ExprToken::Minus => lhs - rhs, - ) - } - ExprRule::Term if elems.len() == 1 => eval(&elems[0]), - ExprRule::Term => { - let lhs = eval(&elems[0]); - let rhs = eval(&elems[2]); - match_atom!(elems[1], - ExprToken::Mul => lhs * rhs, - ExprToken::Div => lhs / rhs, - ) - } - ExprRule::Num if elems.len() == 1 => eval(&elems[0]), - ExprRule::Num => eval(&elems[1]), - - } - } - SExp::Atom(token) => token.as_str().parse().unwrap(), - } -} - fn main() -> anyhow::Result<()> { let mut input = String::new(); stdin().read_line(&mut input)?; @@ -101,7 +56,7 @@ fn main() -> anyhow::Result<()> { .build_lexer()? .build_parser()? .process::>(&input)?; - println!("{} = {}", input.trim(), eval(&sexp)); + println!("Success : {}", sexp); Ok(()) } From e643091da67f15bfbd0bc9775ddf823af9ca08c7 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 00:16:12 +0900 Subject: [PATCH 45/55] =?UTF-8?q?[add]=20prebuild,=20load=20=E3=83=9E?= =?UTF-8?q?=E3=82=AF=E3=83=AD=E4=BD=9C=E6=88=90=E3=81=AE=E6=BA=96=E5=82=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 22 +++++++++++++ Cargo.toml | 3 ++ crates/core_macros/Cargo.toml | 14 +++++++++ crates/core_macros/src/lib.rs | 11 +++++++ examples/prebuild/Cargo.toml | 13 ++++++++ examples/prebuild/build.rs | 59 +++++++++++++++++++++++++++++++++++ examples/prebuild/src/main.rs | 4 +++ src/lib.rs | 1 + 8 files changed, 127 insertions(+) create mode 100644 crates/core_macros/Cargo.toml create mode 100644 crates/core_macros/src/lib.rs create mode 100644 examples/prebuild/Cargo.toml create mode 100644 examples/prebuild/build.rs create mode 100644 examples/prebuild/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 3d26de8..eeb0e8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,6 +25,7 @@ dependencies = [ "copager", "copager_cfg", "copager_core", + "copager_core_macros", "copager_ir", "copager_ir_sexp", "copager_ir_void", @@ -63,6 +64,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "copager_core_macros" +version = "0.1.1" +dependencies = [ + "anyhow", + "proc-macro2", + "quote", + "syn", + "thiserror", +] + [[package]] name = "copager_ir" version = "0.1.1" @@ -206,6 +218,16 @@ dependencies = [ "thiserror", ] +[[package]] +name = "example_prebuild" +version = "0.1.0" +dependencies = [ + "anyhow", + "copager", + "serde", + "thiserror", +] + [[package]] name = "half" version = "1.8.3" diff --git a/Cargo.toml b/Cargo.toml index adc9653..3c45e6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2024" [dependencies] copager_core = { path = "./crates/core" } +copager_core_macros = { path = "./crates/core_macros" } copager_cfg = { path = "./crates/cfg" } copager_lex = { path = "./crates/lex", optional = true } copager_lex_regex = { path = "./crates/lex_regex", optional = true } @@ -43,6 +44,7 @@ resolver = "2" members = [ # Copager "./crates/core", + "./crates/core_macros", "./crates/cfg", "./crates/lex", "./crates/lex_derive", @@ -57,6 +59,7 @@ members = [ # Examples "./examples/oneshot", + "./examples/prebuild", ] exclude = [] diff --git a/crates/core_macros/Cargo.toml b/crates/core_macros/Cargo.toml new file mode 100644 index 0000000..fcb75d7 --- /dev/null +++ b/crates/core_macros/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "copager_core_macros" +version = "0.1.1" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +proc-macro2 = "1.0" +quote = "1.0" +syn = { version ="2.0", features = ["full", "extra-traits"] } + +[lib] +proc-macro = true diff --git a/crates/core_macros/src/lib.rs b/crates/core_macros/src/lib.rs new file mode 100644 index 0000000..3d28ee3 --- /dev/null +++ b/crates/core_macros/src/lib.rs @@ -0,0 +1,11 @@ +use proc_macro::TokenStream; + +#[proc_macro_attribute] +pub fn prebuild(_attr: TokenStream, item: TokenStream) -> TokenStream { + item +} + +#[proc_macro_attribute] +pub fn load(_attr: TokenStream, item: TokenStream) -> TokenStream { + item +} diff --git a/examples/prebuild/Cargo.toml b/examples/prebuild/Cargo.toml new file mode 100644 index 0000000..e5e4f47 --- /dev/null +++ b/examples/prebuild/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "example_prebuild" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +copager = { path = "../..", features = ["derive", "regexlex", "lr1", "sexp"] } + +[build-dependencies] +serde = { workspace = true } +copager = { path = "../..", features = ["derive", "regexlex", "lr1"] } diff --git a/examples/prebuild/build.rs b/examples/prebuild/build.rs new file mode 100644 index 0000000..ae650f9 --- /dev/null +++ b/examples/prebuild/build.rs @@ -0,0 +1,59 @@ +use serde::{Deserialize, Serialize}; + +use copager::lex::{LexSource, RegexLexer}; +use copager::parse::{ParseSource, LR1}; +use copager::prelude::*; +use copager::{Grammar, Processor}; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize, +)] +enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize, +)] +enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +type MyGrammar = Grammar; +type MyLexer = RegexLexer; +type MyParser = LR1; +type MyProcessor = Processor; + +#[copager::prebuild] +fn main() { + +} diff --git a/examples/prebuild/src/main.rs b/examples/prebuild/src/main.rs new file mode 100644 index 0000000..c3ed21e --- /dev/null +++ b/examples/prebuild/src/main.rs @@ -0,0 +1,4 @@ +#[copager::load] +fn main() { + +} diff --git a/src/lib.rs b/src/lib.rs index d9e5f31..c7043ed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ pub use copager_core::*; +pub use copager_core_macros::*; pub use copager_cfg as cfg; pub mod lex { From b21aee47b7af6fa5307455d5ab5805a10e3ed907 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 00:36:28 +0900 Subject: [PATCH 46/55] [update] examples/prebuild --- Cargo.lock | 11 ++++++ examples/prebuild/Cargo.toml | 6 ++- examples/prebuild/build.rs | 57 +--------------------------- examples/prebuild/grammar/Cargo.toml | 10 +++++ examples/prebuild/grammar/src/lib.rs | 54 ++++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 57 deletions(-) create mode 100644 examples/prebuild/grammar/Cargo.toml create mode 100644 examples/prebuild/grammar/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index eeb0e8c..2a8d55c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -221,6 +221,17 @@ dependencies = [ [[package]] name = "example_prebuild" version = "0.1.0" +dependencies = [ + "anyhow", + "copager", + "example_prebuild_grammar", + "serde", + "thiserror", +] + +[[package]] +name = "example_prebuild_grammar" +version = "0.1.0" dependencies = [ "anyhow", "copager", diff --git a/examples/prebuild/Cargo.toml b/examples/prebuild/Cargo.toml index e5e4f47..e006710 100644 --- a/examples/prebuild/Cargo.toml +++ b/examples/prebuild/Cargo.toml @@ -6,8 +6,10 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -copager = { path = "../..", features = ["derive", "regexlex", "lr1", "sexp"] } +copager = { path = "../.." } +grammar = { package = "example_prebuild_grammar", path = "./grammar" } [build-dependencies] serde = { workspace = true } -copager = { path = "../..", features = ["derive", "regexlex", "lr1"] } +copager = { path = "../.." } +grammar = { package = "example_prebuild_grammar", path = "./grammar" } diff --git a/examples/prebuild/build.rs b/examples/prebuild/build.rs index ae650f9..0c1c036 100644 --- a/examples/prebuild/build.rs +++ b/examples/prebuild/build.rs @@ -1,59 +1,6 @@ -use serde::{Deserialize, Serialize}; - -use copager::lex::{LexSource, RegexLexer}; -use copager::parse::{ParseSource, LR1}; -use copager::prelude::*; -use copager::{Grammar, Processor}; - -#[derive( - Debug, Default, Copy, Clone, Hash, PartialEq, Eq, - LexSource, Serialize, Deserialize, -)] -enum ExprToken { - #[default] - #[token(text = r"\+")] - Plus, - #[token(text = r"-")] - Minus, - #[token(text = r"\*")] - Mul, - #[token(text = r"/")] - Div, - #[token(text = r"\(")] - BracketL, - #[token(text = r"\)")] - BracketR, - #[token(text = r"[1-9][0-9]*")] - Num, - #[token(text = r"[ \t\n]+", ignored)] - _Whitespace, -} - -#[derive( - Debug, Default, Copy, Clone, Hash, PartialEq, Eq, - ParseSource, Serialize, Deserialize, -)] -enum ExprRule { - #[default] - #[rule(" ::= Plus ")] - #[rule(" ::= Minus ")] - #[rule(" ::= ")] - Expr, - #[rule(" ::= Mul ")] - #[rule(" ::= Div ")] - #[rule(" ::= ")] - Term, - #[rule(" ::= BracketL BracketR")] - #[rule(" ::= Num")] - Num, -} - -type MyGrammar = Grammar; -type MyLexer = RegexLexer; -type MyParser = LR1; -type MyProcessor = Processor; +use grammar::MyProcessor; #[copager::prebuild] fn main() { - + let processor = MyProcessor::new(); } diff --git a/examples/prebuild/grammar/Cargo.toml b/examples/prebuild/grammar/Cargo.toml new file mode 100644 index 0000000..440c658 --- /dev/null +++ b/examples/prebuild/grammar/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "example_prebuild_grammar" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { workspace = true } +thiserror = { workspace = true } +serde = { workspace = true } +copager = { path = "../../..", features = ["derive", "regexlex", "lr1", "sexp"] } diff --git a/examples/prebuild/grammar/src/lib.rs b/examples/prebuild/grammar/src/lib.rs new file mode 100644 index 0000000..c1de489 --- /dev/null +++ b/examples/prebuild/grammar/src/lib.rs @@ -0,0 +1,54 @@ +use serde::{Deserialize, Serialize}; + +use copager::lex::{LexSource, RegexLexer}; +use copager::parse::{ParseSource, LR1}; +use copager::prelude::*; +use copager::{Grammar, Processor}; + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + LexSource, Serialize, Deserialize, +)] +pub enum ExprToken { + #[default] + #[token(text = r"\+")] + Plus, + #[token(text = r"-")] + Minus, + #[token(text = r"\*")] + Mul, + #[token(text = r"/")] + Div, + #[token(text = r"\(")] + BracketL, + #[token(text = r"\)")] + BracketR, + #[token(text = r"[1-9][0-9]*")] + Num, + #[token(text = r"[ \t\n]+", ignored)] + _Whitespace, +} + +#[derive( + Debug, Default, Copy, Clone, Hash, PartialEq, Eq, + ParseSource, Serialize, Deserialize, +)] +pub enum ExprRule { + #[default] + #[rule(" ::= Plus ")] + #[rule(" ::= Minus ")] + #[rule(" ::= ")] + Expr, + #[rule(" ::= Mul ")] + #[rule(" ::= Div ")] + #[rule(" ::= ")] + Term, + #[rule(" ::= BracketL BracketR")] + #[rule(" ::= Num")] + Num, +} + +pub type MyGrammar = Grammar; +pub type MyLexer = RegexLexer; +pub type MyParser = LR1; +pub type MyProcessor = Processor; From 168a436f6870f8079abc6e1d484550a0a7a21573 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 16:04:16 +0900 Subject: [PATCH 47/55] =?UTF-8?q?[add]=20copager::{prebuild,=20load}=20?= =?UTF-8?q?=E3=83=9E=E3=82=AF=E3=83=AD=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 6 ++++-- crates/core_macros/src/impl.rs | 2 ++ crates/core_macros/src/impl/load.rs | 25 +++++++++++++++++++++++++ crates/core_macros/src/impl/prebuild.rs | 24 ++++++++++++++++++++++++ crates/core_macros/src/lib.rs | 23 ++++++++++++++++++----- examples/prebuild/Cargo.toml | 4 +++- examples/prebuild/build.rs | 6 ++++-- examples/prebuild/src/main.rs | 17 ++++++++++++++++- 8 files changed, 96 insertions(+), 11 deletions(-) create mode 100644 crates/core_macros/src/impl.rs create mode 100644 crates/core_macros/src/impl/load.rs create mode 100644 crates/core_macros/src/impl/prebuild.rs diff --git a/Cargo.lock b/Cargo.lock index 2a8d55c..5185ce2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -226,6 +226,7 @@ dependencies = [ "copager", "example_prebuild_grammar", "serde", + "serde_json", "thiserror", ] @@ -367,11 +368,12 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] diff --git a/crates/core_macros/src/impl.rs b/crates/core_macros/src/impl.rs new file mode 100644 index 0000000..93d0603 --- /dev/null +++ b/crates/core_macros/src/impl.rs @@ -0,0 +1,2 @@ +pub(crate) mod prebuild; +pub(crate) mod load; diff --git a/crates/core_macros/src/impl/load.rs b/crates/core_macros/src/impl/load.rs new file mode 100644 index 0000000..0c388a3 --- /dev/null +++ b/crates/core_macros/src/impl/load.rs @@ -0,0 +1,25 @@ +use proc_macro2::TokenStream; +use quote::quote; +use syn::ItemFn; + +pub fn proc_macro_impl_load(_args: TokenStream, ast: ItemFn) -> TokenStream { + let fn_visibility = ast.vis; + let fn_ident = ast.sig.ident; + let fn_args = ast.sig.inputs; + let fn_ret_type = ast.sig.output; + let fn_body = ast.block; + + quote! { + fn #fn_ident () #fn_ret_type { + #fn_visibility fn __inner (#fn_args) #fn_ret_type { + #fn_body + } + + let out_dir = std::env::var_os("OUT_DIR").unwrap(); + let cache_path = std::path::Path::new(&out_dir).join("MyProcessor.cache"); + let cache_body = std::fs::read_to_string(cache_path).unwrap(); + let deserialized = serde_json::from_str(&cache_body).unwrap(); + __inner(deserialized) + } + } +} diff --git a/crates/core_macros/src/impl/prebuild.rs b/crates/core_macros/src/impl/prebuild.rs new file mode 100644 index 0000000..be6eae4 --- /dev/null +++ b/crates/core_macros/src/impl/prebuild.rs @@ -0,0 +1,24 @@ +use proc_macro2::TokenStream; +use quote::quote; +use syn::ItemFn; + +pub fn proc_macro_impl_prebuild(_args: TokenStream, ast: ItemFn) -> TokenStream { + let fn_visibility = ast.vis; + let fn_ident = ast.sig.ident; + let fn_args = ast.sig.inputs; + let fn_ret_type = ast.sig.output; + let fn_body = ast.block; + + quote! { + fn #fn_ident () { + #fn_visibility fn __inner (#fn_args) #fn_ret_type { + #fn_body + } + + let serialized = serde_json::to_string(&__inner()).unwrap(); + let out_dir = std::env::var_os("OUT_DIR").unwrap(); + let cache_path = std::path::Path::new(&out_dir).join("MyProcessor.cache"); + std::fs::write(cache_path, serialized).unwrap(); + } + } +} diff --git a/crates/core_macros/src/lib.rs b/crates/core_macros/src/lib.rs index 3d28ee3..b9cf84d 100644 --- a/crates/core_macros/src/lib.rs +++ b/crates/core_macros/src/lib.rs @@ -1,11 +1,24 @@ -use proc_macro::TokenStream; +mod r#impl; + +use proc_macro2::TokenStream; +use syn::{parse_macro_input, ItemFn}; #[proc_macro_attribute] -pub fn prebuild(_attr: TokenStream, item: TokenStream) -> TokenStream { - item +pub fn prebuild( + attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + let args: TokenStream = attr.into(); + let ast = parse_macro_input!(item as ItemFn); + r#impl::prebuild::proc_macro_impl_prebuild(args, ast).into() } #[proc_macro_attribute] -pub fn load(_attr: TokenStream, item: TokenStream) -> TokenStream { - item +pub fn load( + attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + let args: TokenStream = attr.into(); + let ast = parse_macro_input!(item as ItemFn); + r#impl::load::proc_macro_impl_load(args, ast).into() } diff --git a/examples/prebuild/Cargo.toml b/examples/prebuild/Cargo.toml index e006710..587e3e8 100644 --- a/examples/prebuild/Cargo.toml +++ b/examples/prebuild/Cargo.toml @@ -6,10 +6,12 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -copager = { path = "../.." } +serde_json = "1.0.128" +copager = { path = "../..", features = ["sexp"] } grammar = { package = "example_prebuild_grammar", path = "./grammar" } [build-dependencies] serde = { workspace = true } +serde_json = "1.0.128" copager = { path = "../.." } grammar = { package = "example_prebuild_grammar", path = "./grammar" } diff --git a/examples/prebuild/build.rs b/examples/prebuild/build.rs index 0c1c036..69c88be 100644 --- a/examples/prebuild/build.rs +++ b/examples/prebuild/build.rs @@ -1,6 +1,8 @@ use grammar::MyProcessor; #[copager::prebuild] -fn main() { - let processor = MyProcessor::new(); +fn main() -> MyProcessor { + MyProcessor::new() + .prebuild_parser() + .unwrap() } diff --git a/examples/prebuild/src/main.rs b/examples/prebuild/src/main.rs index c3ed21e..6c2d5e6 100644 --- a/examples/prebuild/src/main.rs +++ b/examples/prebuild/src/main.rs @@ -1,4 +1,19 @@ +use std::io::stdin; + +use copager::ir::SExp; + +use grammar::MyProcessor; + #[copager::load] -fn main() { +fn main(processor: MyProcessor) -> anyhow::Result<()> { + let mut input = String::new(); + stdin().read_line(&mut input)?; + + let sexp = processor + .build_lexer()? + .build_parser_by_cache() + .process::>(&input)?; + println!("{}", sexp); + Ok(()) } From 0a77959656389236fd1c85de9e857eeb22494197 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 16:15:23 +0900 Subject: [PATCH 48/55] =?UTF-8?q?[add]=20prebuild=20=E3=83=95=E3=83=A9?= =?UTF-8?q?=E3=82=B0=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 - Cargo.toml | 4 +++- crates/core_macros/src/impl/load.rs | 2 +- crates/core_macros/src/impl/prebuild.rs | 2 +- examples/prebuild/Cargo.toml | 6 ++---- src/lib.rs | 6 ++++++ 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5185ce2..f2f639e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -226,7 +226,6 @@ dependencies = [ "copager", "example_prebuild_grammar", "serde", - "serde_json", "thiserror", ] diff --git a/Cargo.toml b/Cargo.toml index 3c45e6e..3f21d94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ version = "0.1.1" edition = "2024" [dependencies] +serde_json = { version = "1.0.117", optional = true } copager_core = { path = "./crates/core" } copager_core_macros = { path = "./crates/core_macros" } copager_cfg = { path = "./crates/cfg" } @@ -26,8 +27,9 @@ copager = { path = ".", features = ["derive", "all"] } [features] # common default = ["dep:copager_lex", "dep:copager_parse"] +all = ["prebuild", "derive", "regexlex", "lr1", "void", "sexp"] +prebuild = ["dep:serde_json"] derive = ["copager_lex/derive", "copager_parse/derive"] -all = ["regexlex", "lr1", "void", "sexp"] # lex regexlex = ["dep:copager_lex_regex"] diff --git a/crates/core_macros/src/impl/load.rs b/crates/core_macros/src/impl/load.rs index 0c388a3..003a092 100644 --- a/crates/core_macros/src/impl/load.rs +++ b/crates/core_macros/src/impl/load.rs @@ -18,7 +18,7 @@ pub fn proc_macro_impl_load(_args: TokenStream, ast: ItemFn) -> TokenStream { let out_dir = std::env::var_os("OUT_DIR").unwrap(); let cache_path = std::path::Path::new(&out_dir).join("MyProcessor.cache"); let cache_body = std::fs::read_to_string(cache_path).unwrap(); - let deserialized = serde_json::from_str(&cache_body).unwrap(); + let deserialized = copager::prebuild::deserialize(&cache_body).unwrap(); __inner(deserialized) } } diff --git a/crates/core_macros/src/impl/prebuild.rs b/crates/core_macros/src/impl/prebuild.rs index be6eae4..227bc6c 100644 --- a/crates/core_macros/src/impl/prebuild.rs +++ b/crates/core_macros/src/impl/prebuild.rs @@ -15,7 +15,7 @@ pub fn proc_macro_impl_prebuild(_args: TokenStream, ast: ItemFn) -> TokenStream #fn_body } - let serialized = serde_json::to_string(&__inner()).unwrap(); + let serialized = copager::prebuild::serialize(&__inner()).unwrap(); let out_dir = std::env::var_os("OUT_DIR").unwrap(); let cache_path = std::path::Path::new(&out_dir).join("MyProcessor.cache"); std::fs::write(cache_path, serialized).unwrap(); diff --git a/examples/prebuild/Cargo.toml b/examples/prebuild/Cargo.toml index 587e3e8..c3ffa74 100644 --- a/examples/prebuild/Cargo.toml +++ b/examples/prebuild/Cargo.toml @@ -6,12 +6,10 @@ edition = "2021" [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde_json = "1.0.128" -copager = { path = "../..", features = ["sexp"] } +copager = { path = "../..", features = ["prebuild", "sexp"] } grammar = { package = "example_prebuild_grammar", path = "./grammar" } [build-dependencies] serde = { workspace = true } -serde_json = "1.0.128" -copager = { path = "../.." } +copager = { path = "../..", features = ["prebuild"] } grammar = { package = "example_prebuild_grammar", path = "./grammar" } diff --git a/src/lib.rs b/src/lib.rs index c7043ed..c3674e6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,12 @@ pub use copager_core::*; pub use copager_core_macros::*; pub use copager_cfg as cfg; +#[cfg(any(feature = "all", feature = "prebuild"))] +pub mod prebuild { + pub use serde_json::to_string as serialize; + pub use serde_json::from_str as deserialize; +} + pub mod lex { pub use copager_lex::*; #[cfg(any(feature = "all", feature = "regexlex"))] From c5079738c17835347f3abec0e0b9fce275700edf Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 16:19:19 +0900 Subject: [PATCH 49/55] [update] README.md --- README.md | 20 +++++++++----------- examples/prebuild/src/main.rs | 2 +- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index def24ee..0c40792 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,9 @@ Rust製パーサジェネレータ ### Common -- `derive` - `all` +- `derive` +- `prebuild` ### Lex @@ -24,23 +25,20 @@ Rust製パーサジェネレータ ## Examples -[examples/oneshot](examples/oneshot) +### One-shot -### ok +[examples/oneshot](examples/oneshot) ``` $ echo "10 * (20 + 30)" | cargo run -p example_oneshot Success : (Expr (Term (Term (Num "10")) "*" (Num "(" (Expr (Expr (Term (Num "20"))) "+" (Term (Num "30"))) ")"))) ``` -### error +### Pre-build + +[examples/prebuild](examples/prebuild) ``` -$ echo "(10 *)" | cargo run -p example_oneshot -Error: Unexpected token "BracketR" found ------ - 1: (10 *) - ^ here -Found at line 1, column 6. ----- +$ echo "10 * (20 + 30)" | cargo run -p example_prebuild +Success : (Expr (Term (Term (Num "10")) "*" (Num "(" (Expr (Expr (Term (Num "20"))) "+" (Term (Num "30"))) ")"))) ``` diff --git a/examples/prebuild/src/main.rs b/examples/prebuild/src/main.rs index 6c2d5e6..ccb8ee7 100644 --- a/examples/prebuild/src/main.rs +++ b/examples/prebuild/src/main.rs @@ -13,7 +13,7 @@ fn main(processor: MyProcessor) -> anyhow::Result<()> { .build_lexer()? .build_parser_by_cache() .process::>(&input)?; - println!("{}", sexp); + println!("Success : {}", sexp); Ok(()) } From 2a8e37c5521377d31f76762be2b78091a8ebfa1b Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 16:30:07 +0900 Subject: [PATCH 50/55] =?UTF-8?q?[change]=20prebuild=5F*=20build=5Fby=5Fca?= =?UTF-8?q?che=20=E7=B3=BB=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=81=AE?= =?UTF-8?q?=E5=AE=9F=E8=A3=85=E6=9D=A1=E4=BB=B6=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/core/src/lib.rs | 108 +++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index ecf9e59..d60a5d9 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -78,64 +78,87 @@ where } } - pub fn prebuild_lexer(self) -> anyhow::Result + pub fn build_lexer(self) -> anyhow::Result where G::Lex: Default, - Dl: Cacheable, { - self.prebuild_lexer_by(G::Lex::default()) + self.build_lexer_by(G::Lex::default()) } - pub fn prebuild_lexer_by(mut self, source: G::Lex) -> anyhow::Result + pub fn build_lexer_by(mut self, source: G::Lex) -> anyhow::Result where - Dl: Cacheable, + G::Lex: Default, { assert!(self.cache_lex.is_none()); - let cache_lex = Dl::new(source)?; - self.cache_lex = Some(to_vec_packed(&cache_lex)?); + let lexer = Dl::try_from(source)?; + self.lexer = Some(lexer); Ok(self) } - pub fn prebuild_parser(self) -> anyhow::Result + pub fn build_parser(self) -> anyhow::Result where G::Lex: Default, G::Parse: Default, - Dp: Cacheable<(G::Lex, G::Parse)>, { - self.prebuild_parser_by((G::Lex::default(), G::Parse::default())) + self.build_parser_by((G::Lex::default(), G::Parse::default())) } - pub fn prebuild_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result + pub fn build_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result where G::Lex: Default, G::Parse: Default, - Dp: Cacheable<(G::Lex, G::Parse)>, { assert!(self.cache_parse.is_none()); - let cache_parse = Dp::new(source)?; - self.cache_parse = Some(to_vec_packed(&cache_parse)?); + let parser = Dp::try_from(source)?; + self.parser = Some(parser); Ok(self) } - pub fn build_lexer(self) -> anyhow::Result + pub fn process<'input, I>(&self, input: &'input str) -> anyhow::Result where - G::Lex: Default, + I: IR<'input, G::Lex, G::Parse>, { - self.build_lexer_by(G::Lex::default()) + assert!(self.lexer.is_some()); + assert!(self.parser.is_some()); + + let lexer = self.lexer.as_ref().unwrap(); + let parser = self.parser.as_ref().unwrap(); + + let mut ir_builder = I::Builder::new(); + for result in parser.run(lexer.run(input)) { + match result { + ParseEvent::Read(token) => ir_builder.on_read(token)?, + ParseEvent::Parse{ rule,len } => ir_builder.on_parse(rule, len)?, + ParseEvent::Err(err) => return Err(err), + } + } + + ir_builder.build() } +} - pub fn build_lexer_by(mut self, source: G::Lex) -> anyhow::Result +impl Processor +where + G: GrammarDesign, + Dl: LexDriver + Cacheable, + Dp: ParseDriver, +{ + pub fn prebuild_lexer(self) -> anyhow::Result where G::Lex: Default, { + self.prebuild_lexer_by(G::Lex::default()) + } + + pub fn prebuild_lexer_by(mut self, source: G::Lex) -> anyhow::Result { assert!(self.cache_lex.is_none()); - let lexer = Dl::try_from(source)?; - self.lexer = Some(lexer); + let cache_lex = Dl::new(source)?; + self.cache_lex = Some(to_vec_packed(&cache_lex)?); Ok(self) } @@ -143,7 +166,6 @@ where pub fn build_lexer_by_cache(mut self) -> Self where G::Lex: Default, - Dl: Cacheable, { assert!(self.lexer.is_some()); @@ -154,24 +176,27 @@ where self } +} - pub fn build_parser(self) -> anyhow::Result +impl Processor +where + G: GrammarDesign, + Dl: LexDriver, + Dp: ParseDriver + Cacheable<(G::Lex, G::Parse)>, +{ + pub fn prebuild_parser(self) -> anyhow::Result where G::Lex: Default, G::Parse: Default, { - self.build_parser_by((G::Lex::default(), G::Parse::default())) + self.prebuild_parser_by((G::Lex::default(), G::Parse::default())) } - pub fn build_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result - where - G::Lex: Default, - G::Parse: Default, - { + pub fn prebuild_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result { assert!(self.cache_parse.is_none()); - let parser = Dp::try_from(source)?; - self.parser = Some(parser); + let cache_parse = Dp::new(source)?; + self.cache_parse = Some(to_vec_packed(&cache_parse)?); Ok(self) } @@ -180,7 +205,6 @@ where where G::Lex: Default, G::Parse: Default, - Dp: Cacheable<(G::Lex, G::Parse)>, { assert!(self.parser.is_none()); @@ -191,26 +215,4 @@ where self } - - pub fn process<'input, I>(&self, input: &'input str) -> anyhow::Result - where - I: IR<'input, G::Lex, G::Parse>, - { - assert!(self.lexer.is_some()); - assert!(self.parser.is_some()); - - let lexer = self.lexer.as_ref().unwrap(); - let parser = self.parser.as_ref().unwrap(); - - let mut ir_builder = I::Builder::new(); - for result in parser.run(lexer.run(input)) { - match result { - ParseEvent::Read(token) => ir_builder.on_read(token)?, - ParseEvent::Parse{ rule,len } => ir_builder.on_parse(rule, len)?, - ParseEvent::Err(err) => return Err(err), - } - } - - ir_builder.build() - } } From b663177d76cb43499984320d7175aec334074152 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 16:38:08 +0900 Subject: [PATCH 51/55] =?UTF-8?q?[change]=20Processor=20=E3=81=AE=E5=90=84?= =?UTF-8?q?=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=81=AB=E7=BD=AE=E3=81=84?= =?UTF-8?q?=E3=81=A6=E3=81=84=E3=81=9F=20assert=20=E3=82=92=E5=89=8A?= =?UTF-8?q?=E9=99=A4=20&=20=E7=84=A1=E9=A7=84=E3=81=AA=20where=20=E7=AF=80?= =?UTF-8?q?=E3=82=92=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/core/src/lib.rs | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index d60a5d9..a7571f7 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -85,12 +85,7 @@ where self.build_lexer_by(G::Lex::default()) } - pub fn build_lexer_by(mut self, source: G::Lex) -> anyhow::Result - where - G::Lex: Default, - { - assert!(self.cache_lex.is_none()); - + pub fn build_lexer_by(mut self, source: G::Lex) -> anyhow::Result { let lexer = Dl::try_from(source)?; self.lexer = Some(lexer); @@ -105,13 +100,7 @@ where self.build_parser_by((G::Lex::default(), G::Parse::default())) } - pub fn build_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result - where - G::Lex: Default, - G::Parse: Default, - { - assert!(self.cache_parse.is_none()); - + pub fn build_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result { let parser = Dp::try_from(source)?; self.parser = Some(parser); @@ -122,9 +111,6 @@ where where I: IR<'input, G::Lex, G::Parse>, { - assert!(self.lexer.is_some()); - assert!(self.parser.is_some()); - let lexer = self.lexer.as_ref().unwrap(); let parser = self.parser.as_ref().unwrap(); @@ -155,20 +141,13 @@ where } pub fn prebuild_lexer_by(mut self, source: G::Lex) -> anyhow::Result { - assert!(self.cache_lex.is_none()); - let cache_lex = Dl::new(source)?; self.cache_lex = Some(to_vec_packed(&cache_lex)?); Ok(self) } - pub fn build_lexer_by_cache(mut self) -> Self - where - G::Lex: Default, - { - assert!(self.lexer.is_some()); - + pub fn build_lexer_by_cache(mut self) -> Self { let cache_lex = self.cache_lex.as_ref().unwrap(); let cache_lex = from_slice(cache_lex); let lexer = Dl::restore(cache_lex.unwrap()); @@ -193,21 +172,13 @@ where } pub fn prebuild_parser_by(mut self, source: (G::Lex, G::Parse)) -> anyhow::Result { - assert!(self.cache_parse.is_none()); - let cache_parse = Dp::new(source)?; self.cache_parse = Some(to_vec_packed(&cache_parse)?); Ok(self) } - pub fn build_parser_by_cache(mut self) -> Self - where - G::Lex: Default, - G::Parse: Default, - { - assert!(self.parser.is_none()); - + pub fn build_parser_by_cache(mut self) -> Self { let cache_parse = self.cache_parse.as_ref().unwrap(); let cache_parse = from_slice(cache_parse); let parser = Dp::restore(cache_parse.unwrap()); From d6c691c13ecb49dcb5448920b39eb3dbb77a8dc2 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 17:17:22 +0900 Subject: [PATCH 52/55] =?UTF-8?q?[fix]=20cargo=20run=20=E4=BB=A5=E5=A4=96?= =?UTF-8?q?=E3=81=AE=E5=AE=9F=E8=A1=8C=E3=81=A7=20copager::load=20?= =?UTF-8?q?=E3=81=8C=E5=8B=95=E3=81=8B=E3=81=AA=E3=81=84=E3=83=90=E3=82=B0?= =?UTF-8?q?=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/core_macros/src/impl/load.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/core_macros/src/impl/load.rs b/crates/core_macros/src/impl/load.rs index 003a092..87779a6 100644 --- a/crates/core_macros/src/impl/load.rs +++ b/crates/core_macros/src/impl/load.rs @@ -15,9 +15,7 @@ pub fn proc_macro_impl_load(_args: TokenStream, ast: ItemFn) -> TokenStream { #fn_body } - let out_dir = std::env::var_os("OUT_DIR").unwrap(); - let cache_path = std::path::Path::new(&out_dir).join("MyProcessor.cache"); - let cache_body = std::fs::read_to_string(cache_path).unwrap(); + let cache_body = include_str!(concat!(env!("OUT_DIR"), "/MyProcessor.cache")); let deserialized = copager::prebuild::deserialize(&cache_body).unwrap(); __inner(deserialized) } From 2579c523d4869697adaf1466db82b463ba7e6227 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 19:03:00 +0900 Subject: [PATCH 53/55] =?UTF-8?q?[update]=20core=5Fmacros=20=E3=82=AF?= =?UTF-8?q?=E3=83=AC=E3=83=BC=E3=83=88=E3=81=AE=E5=85=AC=E9=96=8B=E7=AF=84?= =?UTF-8?q?=E5=9B=B2=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c3674e6..de1cfba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,9 @@ pub use copager_core::*; -pub use copager_core_macros::*; pub use copager_cfg as cfg; +#[cfg(any(feature = "all", feature = "prebuild"))] +pub use copager_core_macros::*; + #[cfg(any(feature = "all", feature = "prebuild"))] pub mod prebuild { pub use serde_json::to_string as serialize; From 4938f1d8ecf69f5994db7d28d30f379e9e6df990 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 19:09:14 +0900 Subject: [PATCH 54/55] =?UTF-8?q?[update]=20Cargo.toml=20=E3=81=AE?= =?UTF-8?q?=E3=83=90=E3=83=BC=E3=82=B8=E3=83=A7=E3=83=B3=E6=8C=87=E5=AE=9A?= =?UTF-8?q?=E3=82=92=200.2.0=20=E3=81=AB=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 全体的に大幅な変更をしたので今回は全部のクレートを 0.2.0 にする --- Cargo.lock | 26 +++++++++++++------------- Cargo.toml | 4 ++-- crates/cfg/Cargo.toml | 2 +- crates/core/Cargo.toml | 2 +- crates/core_macros/Cargo.toml | 2 +- crates/ir/Cargo.toml | 2 +- crates/ir_sexp/Cargo.toml | 2 +- crates/ir_void/Cargo.toml | 2 +- crates/lex/Cargo.toml | 2 +- crates/lex_derive/Cargo.toml | 2 +- crates/lex_regex/Cargo.toml | 2 +- crates/parse/Cargo.toml | 2 +- crates/parse_derive/Cargo.toml | 2 +- crates/parse_lr1/Cargo.toml | 2 +- 14 files changed, 27 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f2f639e..7768a14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,7 +19,7 @@ checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" [[package]] name = "copager" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager", @@ -39,7 +39,7 @@ dependencies = [ [[package]] name = "copager_cfg" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "thiserror", @@ -47,7 +47,7 @@ dependencies = [ [[package]] name = "copager_core" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", @@ -66,7 +66,7 @@ dependencies = [ [[package]] name = "copager_core_macros" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "proc-macro2", @@ -77,7 +77,7 @@ dependencies = [ [[package]] name = "copager_ir" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", @@ -88,7 +88,7 @@ dependencies = [ [[package]] name = "copager_ir_sexp" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", @@ -103,7 +103,7 @@ dependencies = [ [[package]] name = "copager_ir_void" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", @@ -115,7 +115,7 @@ dependencies = [ [[package]] name = "copager_lex" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", @@ -126,7 +126,7 @@ dependencies = [ [[package]] name = "copager_lex_derive" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", @@ -139,7 +139,7 @@ dependencies = [ [[package]] name = "copager_lex_regex" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", @@ -153,7 +153,7 @@ dependencies = [ [[package]] name = "copager_parse" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", @@ -165,7 +165,7 @@ dependencies = [ [[package]] name = "copager_parse_derive" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", @@ -179,7 +179,7 @@ dependencies = [ [[package]] name = "copager_parse_lr1" -version = "0.1.1" +version = "0.2.0" dependencies = [ "anyhow", "copager_cfg", diff --git a/Cargo.toml b/Cargo.toml index 3f21d94..61469d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ cargo-features = ["edition2024"] [package] name = "copager" -version = "0.1.1" +version = "0.2.0" edition = "2024" [dependencies] @@ -22,7 +22,7 @@ copager_ir_sexp = { path = "./crates/ir_sexp", optional = true } anyhow = { workspace = true } serde = { workspace = true } serde_json = "1.0.117" -copager = { path = ".", features = ["derive", "all"] } +copager = { path = ".", features = ["all"] } [features] # common diff --git a/crates/cfg/Cargo.toml b/crates/cfg/Cargo.toml index b710730..220fec0 100644 --- a/crates/cfg/Cargo.toml +++ b/crates/cfg/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_cfg" -version = "0.1.1" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index bf8c539..6d8bf34 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_core" -version = "0.1.1" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/core_macros/Cargo.toml b/crates/core_macros/Cargo.toml index fcb75d7..d33518c 100644 --- a/crates/core_macros/Cargo.toml +++ b/crates/core_macros/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_core_macros" -version = "0.1.1" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/ir/Cargo.toml b/crates/ir/Cargo.toml index ac192cb..c05bab9 100644 --- a/crates/ir/Cargo.toml +++ b/crates/ir/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_ir" -version = "0.1.1" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/ir_sexp/Cargo.toml b/crates/ir_sexp/Cargo.toml index fc26a85..b3e15ca 100644 --- a/crates/ir_sexp/Cargo.toml +++ b/crates/ir_sexp/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_ir_sexp" -version = "0.1.1" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/ir_void/Cargo.toml b/crates/ir_void/Cargo.toml index 66bb712..fcc2038 100644 --- a/crates/ir_void/Cargo.toml +++ b/crates/ir_void/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_ir_void" -version = "0.1.1" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/lex/Cargo.toml b/crates/lex/Cargo.toml index 4f8c3f6..df17694 100644 --- a/crates/lex/Cargo.toml +++ b/crates/lex/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_lex" -version = "0.1.1" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/lex_derive/Cargo.toml b/crates/lex_derive/Cargo.toml index dde1eb2..6c9cabd 100644 --- a/crates/lex_derive/Cargo.toml +++ b/crates/lex_derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_lex_derive" -version = "0.1.1" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/lex_regex/Cargo.toml b/crates/lex_regex/Cargo.toml index e08ee94..5e07b94 100644 --- a/crates/lex_regex/Cargo.toml +++ b/crates/lex_regex/Cargo.toml @@ -2,7 +2,7 @@ cargo-features = ["edition2024"] [package] name = "copager_lex_regex" -version = "0.1.1" +version = "0.2.0" edition = "2024" [dependencies] diff --git a/crates/parse/Cargo.toml b/crates/parse/Cargo.toml index 1d13812..7863532 100644 --- a/crates/parse/Cargo.toml +++ b/crates/parse/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_parse" -version = "0.1.1" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/parse_derive/Cargo.toml b/crates/parse_derive/Cargo.toml index f923437..dc2fd69 100644 --- a/crates/parse_derive/Cargo.toml +++ b/crates/parse_derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "copager_parse_derive" -version = "0.1.1" +version = "0.2.0" edition = "2021" [dependencies] diff --git a/crates/parse_lr1/Cargo.toml b/crates/parse_lr1/Cargo.toml index dee2c56..3d628d4 100644 --- a/crates/parse_lr1/Cargo.toml +++ b/crates/parse_lr1/Cargo.toml @@ -2,7 +2,7 @@ cargo-features = ["edition2024"] [package] name = "copager_parse_lr1" -version = "0.1.1" +version = "0.2.0" edition = "2024" [dependencies] From 6cd9bffeb33483cfbc761710f7e0b233bcb6a201 Mon Sep 17 00:00:00 2001 From: NakagamiYuta Date: Sun, 8 Sep 2024 19:12:47 +0900 Subject: [PATCH 55/55] =?UTF-8?q?[change]=20#[cfg(feature=3D...)]=20?= =?UTF-8?q?=E3=81=AE=20all=20=E3=83=95=E3=83=A9=E3=82=B0=E3=81=AB=E9=96=A2?= =?UTF-8?q?=E3=81=99=E3=82=8B=E8=A8=98=E8=BF=B0=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index de1cfba..5b802dc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,10 @@ pub use copager_core::*; pub use copager_cfg as cfg; -#[cfg(any(feature = "all", feature = "prebuild"))] +#[cfg(feature = "prebuild")] pub use copager_core_macros::*; -#[cfg(any(feature = "all", feature = "prebuild"))] +#[cfg(feature = "prebuild")] pub mod prebuild { pub use serde_json::to_string as serialize; pub use serde_json::from_str as deserialize; @@ -12,21 +12,21 @@ pub mod prebuild { pub mod lex { pub use copager_lex::*; - #[cfg(any(feature = "all", feature = "regexlex"))] + #[cfg(feature = "regexlex")] pub use copager_lex_regex::*; } pub mod parse { pub use copager_parse::*; - #[cfg(any(feature = "all", feature = "lr1"))] + #[cfg(feature = "lr1")] pub use copager_parse_lr1::*; } pub mod ir { pub use copager_ir::*; - #[cfg(any(feature = "all", feature = "void"))] + #[cfg(feature = "void")] pub use copager_ir_void::*; - #[cfg(any(feature = "all", feature = "sexp"))] + #[cfg(feature = "sexp")] pub use copager_ir_sexp::*; }