From 517ddb30a9127b399adca43dc163f0946af4d417 Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 09:25:43 +0800 Subject: [PATCH 01/16] feat: implement the first version of parser Signed-off-by: tison --- Cargo.lock | 84 ++++++++++++++++++++++ spath/Cargo.toml | 2 + spath/src/expr.rs | 0 spath/src/lib.rs | 4 ++ spath/src/parser/error.rs | 17 +++++ spath/src/parser/mod.rs | 4 ++ spath/src/parser/range.rs | 44 ++++++++++++ spath/src/parser/token.rs | 143 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 298 insertions(+) create mode 100644 spath/src/expr.rs create mode 100644 spath/src/parser/error.rs create mode 100644 spath/src/parser/mod.rs create mode 100644 spath/src/parser/range.rs create mode 100644 spath/src/parser/token.rs diff --git a/Cargo.lock b/Cargo.lock index d9751bd..7a630a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "bitflags" version = "2.6.0" @@ -149,6 +155,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "heck" version = "0.5.0" @@ -208,6 +220,12 @@ dependencies = [ "jiff-tzdb", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.169" @@ -232,6 +250,40 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "logos" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab6f536c1af4c7cc81edf73da1f8029896e7e1e16a219ef09b184e76a296f3db" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "189bbfd0b61330abea797e5e9276408f2edbe4f822d7ad08685d67419aafb34e" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax", + "rustc_version", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebfe8e1a19049ddbfccbd14ac834b215e11b85b90bab0c2dba7c7b92fb5d5cba" +dependencies = [ + "logos-codegen", +] + [[package]] name = "memchr" version = "2.7.4" @@ -302,6 +354,21 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "0.38.42" @@ -321,6 +388,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "semver" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" + [[package]] name = "serde" version = "1.0.217" @@ -365,10 +438,12 @@ version = "0.0.1" dependencies = [ "insta", "jiff", + "logos", "num-cmp", "ordered-float", "serde_json", "thiserror", + "winnow", ] [[package]] @@ -505,6 +580,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39281189af81c07ec09db316b302a3e67bf9bd7cbf6c820b50e35fee9c2fa980" +dependencies = [ + "memchr", +] + [[package]] name = "winsafe" version = "0.0.19" diff --git a/spath/Cargo.toml b/spath/Cargo.toml index 4347fdf..b6f8584 100644 --- a/spath/Cargo.toml +++ b/spath/Cargo.toml @@ -34,9 +34,11 @@ json = ["dep:serde_json"] [dependencies] jiff = { version = "0.1.21" } +logos = { version = "0.15.0" } num-cmp = { version = "0.1.0" } ordered-float = { version = "4.6.0", features = ["num-cmp"] } thiserror = { version = "2.0.8" } +winnow = { version = "0.6.20" } # optional dependencies serde_json = { version = "1.0.133", optional = true } diff --git a/spath/src/expr.rs b/spath/src/expr.rs new file mode 100644 index 0000000..e69de29 diff --git a/spath/src/lib.rs b/spath/src/lib.rs index aed74ec..cf993f3 100644 --- a/spath/src/lib.rs +++ b/spath/src/lib.rs @@ -18,8 +18,12 @@ pub use value::*; mod error; pub use error::*; +mod expr; +pub use expr::*; + #[cfg(feature = "json")] mod json; +mod parser; #[cfg(test)] mod tests; diff --git a/spath/src/parser/error.rs b/spath/src/parser/error.rs new file mode 100644 index 0000000..408cf37 --- /dev/null +++ b/spath/src/parser/error.rs @@ -0,0 +1,17 @@ +use crate::parser::range::Range; + +#[derive(Debug, thiserror::Error)] +#[error("{message}")] +pub struct ParseError { + span: Range, + message: String, +} + +impl ParseError { + pub fn new(span: Range, message: impl Into) -> Self { + Self { + span, + message: message.into(), + } + } +} diff --git a/spath/src/parser/mod.rs b/spath/src/parser/mod.rs new file mode 100644 index 0000000..13aa4b0 --- /dev/null +++ b/spath/src/parser/mod.rs @@ -0,0 +1,4 @@ +mod error; +mod range; +mod token; + diff --git a/spath/src/parser/range.rs b/spath/src/parser/range.rs new file mode 100644 index 0000000..6ae935d --- /dev/null +++ b/spath/src/parser/range.rs @@ -0,0 +1,44 @@ +use std::fmt; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Range { + pub start: u32, + pub end: u32, +} + +impl Range { + pub fn start(&self) -> usize { + self.start as usize + } + + pub fn end(&self) -> usize { + self.end as usize + } +} + +impl fmt::Debug for Range { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}..{}", self.start, self.end) + } +} + +impl fmt::Display for Range { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}..{}", self.start, self.end) + } +} + +impl From for std::ops::Range { + fn from(range: Range) -> std::ops::Range { + (range.start as usize)..(range.end as usize) + } +} + +impl From> for Range { + fn from(range: std::ops::Range) -> Range { + Range { + start: range.start as u32, + end: range.end as u32, + } + } +} diff --git a/spath/src/parser/token.rs b/spath/src/parser/token.rs new file mode 100644 index 0000000..dd4f5fc --- /dev/null +++ b/spath/src/parser/token.rs @@ -0,0 +1,143 @@ +use crate::parser::error::ParseError; +use crate::parser::range::Range; +use logos::{Lexer, Logos}; +use std::fmt; + +#[derive(Clone, PartialEq, Eq)] +pub struct Token<'a> { + pub source: &'a str, + pub kind: TokenKind, + pub span: Range, +} + +impl fmt::Debug for Token<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}({:?})", self.kind, self.span) + } +} + +impl<'a> Token<'a> { + pub fn new_eoi(source: &'a str) -> Self { + Token { + source, + kind: TokenKind::EOI, + span: (source.len()..source.len()).into(), + } + } + + pub fn text(&self) -> &'a str { + &self.source[std::ops::Range::from(self.span)] + } +} + +pub struct Tokenizer<'a> { + source: &'a str, + lexer: Lexer<'a, TokenKind>, + eoi: bool, +} + +impl<'a> Tokenizer<'a> { + pub fn new(source: &'a str) -> Self { + Tokenizer { + source, + lexer: TokenKind::lexer(source), + eoi: false, + } + } +} + +impl<'a> Iterator for Tokenizer<'a> { + type Item = Result, ParseError>; + + fn next(&mut self) -> Option { + match self.lexer.next() { + Some(Err(_)) => { + let span = Range::from(self.lexer.span().start..self.source.len()); + let message = "failed to recognize the rest tokens"; + Some(Err(ParseError::new(span, message))) + } + Some(Ok(kind)) => Some(Ok(Token { + source: self.source, + kind, + span: self.lexer.span().into(), + })), + None => { + if !self.eoi { + self.eoi = true; + Some(Ok(Token::new_eoi(self.source))) + } else { + None + } + } + } + } +} + +#[allow(non_camel_case_types)] +#[derive(logos::Logos, Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum TokenKind { + EOI, + + #[regex(r"[ \t\r\n\f]+", logos::skip)] + Whitespace, + + #[regex(r#"[_a-zA-Z][_a-zA-Z0-9]*"#)] + Ident, + + #[regex(r#"'([^'\\]|\\.)*'"#)] + #[regex(r#""([^"\\]|\\.)*""#)] + LiteralString, + + #[regex(r"(-)?[0-9]+(_|[0-9])*")] + LiteralInteger, + + #[regex(r"(-)?[0-9]+[eE][+-]?[0-9]+")] + #[regex(r"(-)?[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?")] + LiteralFloat, + + // Symbols + #[token("=")] + #[token("==")] + Eq, + #[token("<>")] + #[token("!=")] + NotEq, + #[token("!")] + Not, + #[token("<")] + Lt, + #[token(">")] + Gt, + #[token("<=")] + Lte, + #[token(">=")] + Gte, + #[token("&&")] + And, + #[token("||")] + Or, + #[token("$")] + Dollar, + #[token("@")] + At, + #[token(".")] + Dot, + #[token("?")] + QuestionMark, + #[token("(")] + LParen, + #[token(")")] + RParen, + #[token("[")] + LBracket, + #[token("]")] + RBracket, + + // Keywords + #[token("FALSE", ignore(ascii_case))] + FALSE, + #[token("NULL", ignore(ascii_case))] + NULL, + #[token("TRUE", ignore(ascii_case))] + TRUE, +} From 0025d6d2ab311a1f2240470df5ca5ae6bea063ef Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 10:29:23 +0800 Subject: [PATCH 02/16] refactor json value converter Signed-off-by: tison --- Cargo.lock | 62 ++++++++++++++++ spath/Cargo.toml | 1 + spath/src/json.rs | 46 ------------ spath/src/json/mod.rs | 73 +++++++++++++++++++ ...tests__rfc_9535_example_convertion-10.snap | 6 ++ ..._tests__rfc_9535_example_convertion-2.snap | 6 ++ ..._tests__rfc_9535_example_convertion-3.snap | 6 ++ ..._tests__rfc_9535_example_convertion-4.snap | 6 ++ ..._tests__rfc_9535_example_convertion-5.snap | 6 ++ ..._tests__rfc_9535_example_convertion-6.snap | 6 ++ ...tests__rfc_9535_example_convertion-7.snap} | 4 +- ..._tests__rfc_9535_example_convertion-8.snap | 6 ++ ..._tests__rfc_9535_example_convertion-9.snap | 6 ++ ...n__tests__rfc_9535_example_convertion.snap | 6 ++ spath/src/json/tests.rs | 41 +++++++++++ spath/src/lib.rs | 5 +- spath/src/tests/mod.rs | 25 ------- spath/testdata/rfc-9535-example-1.json | 36 +++++++++ spath/testdata/rfc-9535-example-10.json | 1 + spath/testdata/rfc-9535-example-2.json | 4 + spath/testdata/rfc-9535-example-3.json | 4 + spath/testdata/rfc-9535-example-4.json | 1 + spath/testdata/rfc-9535-example-5.json | 1 + spath/testdata/rfc-9535-example-6.json | 4 + .../rfc-9535-example-7.json} | 0 spath/testdata/rfc-9535-example-8.json | 1 + spath/testdata/rfc-9535-example-9.json | 4 + 27 files changed, 293 insertions(+), 74 deletions(-) delete mode 100644 spath/src/json.rs create mode 100644 spath/src/json/mod.rs create mode 100644 spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-10.snap create mode 100644 spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-2.snap create mode 100644 spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-3.snap create mode 100644 spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-4.snap create mode 100644 spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-5.snap create mode 100644 spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-6.snap rename spath/src/{tests/snapshots/spath__tests__serde_json_to_variant.snap => json/snapshots/spath__json__tests__rfc_9535_example_convertion-7.snap} (58%) create mode 100644 spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-8.snap create mode 100644 spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-9.snap create mode 100644 spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion.snap create mode 100644 spath/src/json/tests.rs delete mode 100644 spath/src/tests/mod.rs create mode 100644 spath/testdata/rfc-9535-example-1.json create mode 100644 spath/testdata/rfc-9535-example-10.json create mode 100644 spath/testdata/rfc-9535-example-2.json create mode 100644 spath/testdata/rfc-9535-example-3.json create mode 100644 spath/testdata/rfc-9535-example-4.json create mode 100644 spath/testdata/rfc-9535-example-5.json create mode 100644 spath/testdata/rfc-9535-example-6.json rename spath/{src/tests/simple.json => testdata/rfc-9535-example-7.json} (100%) create mode 100644 spath/testdata/rfc-9535-example-8.json create mode 100644 spath/testdata/rfc-9535-example-9.json diff --git a/Cargo.lock b/Cargo.lock index 7a630a1..cb70aed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.18" @@ -161,6 +170,29 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "googletest" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce026f84cdd339bf71be01b24fe67470ee634282f68c1c4b563d00a9f002b05" +dependencies = [ + "googletest_macro", + "num-traits", + "regex", + "rustversion", +] + +[[package]] +name = "googletest_macro" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5070fa86976044fe2b004d874c10af5d1aed6d8f6a72ff93a6eb29cc87048bc" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "heck" version = "0.5.0" @@ -354,6 +386,29 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + [[package]] name = "regex-syntax" version = "0.8.5" @@ -382,6 +437,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "rustversion" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" + [[package]] name = "ryu" version = "1.0.18" @@ -436,6 +497,7 @@ checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" name = "spath" version = "0.0.1" dependencies = [ + "googletest", "insta", "jiff", "logos", diff --git a/spath/Cargo.toml b/spath/Cargo.toml index b6f8584..9493bea 100644 --- a/spath/Cargo.toml +++ b/spath/Cargo.toml @@ -44,6 +44,7 @@ winnow = { version = "0.6.20" } serde_json = { version = "1.0.133", optional = true } [dev-dependencies] +googletest = { version = "0.13.0" } insta = { version = "1.41.1" } serde_json = { version = "1.0.133" } diff --git a/spath/src/json.rs b/spath/src/json.rs deleted file mode 100644 index 2bc2f2b..0000000 --- a/spath/src/json.rs +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2024 tison -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::Number; -use crate::Object; -use crate::Value; - -impl From for Value { - fn from(value: serde_json::Value) -> Self { - match value { - serde_json::Value::Null => Value::Null, - serde_json::Value::Bool(b) => Value::Bool(b), - serde_json::Value::Number(n) => { - if let Some(i) = n.as_i64() { - Value::Number(Number::I64(i)) - } else if let Some(u) = n.as_u64() { - Value::Number(Number::U64(u)) - } else { - // always possible - let n = n.as_f64().unwrap(); - Value::Number(Number::F64(n.into())) - } - } - serde_json::Value::String(s) => Value::String(s), - serde_json::Value::Array(a) => Value::Array(a.into_iter().map(Value::from).collect()), - serde_json::Value::Object(o) => { - let mut map = Object::new(); - for (k, v) in o { - map.insert(k, Value::from(v)); - } - Value::Object(map) - } - } - } -} diff --git a/spath/src/json/mod.rs b/spath/src/json/mod.rs new file mode 100644 index 0000000..c94ac0a --- /dev/null +++ b/spath/src/json/mod.rs @@ -0,0 +1,73 @@ +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::Number; +use crate::Value; + +use serde_json::Value as JsonValue; + +#[cfg(test)] +mod tests; + +impl From for Value { + fn from(value: JsonValue) -> Self { + match value { + JsonValue::Null => Value::Null, + JsonValue::Bool(b) => Value::Bool(b), + JsonValue::Number(n) => { + if let Some(i) = n.as_i64() { + Value::Number(Number::I64(i)) + } else if let Some(u) = n.as_u64() { + Value::Number(Number::U64(u)) + } else { + // always possible + let n = n.as_f64().unwrap(); + Value::Number(Number::F64(n.into())) + } + } + JsonValue::String(s) => Value::String(s), + JsonValue::Array(a) => Value::Array(a.into_iter().map(Value::from).collect()), + JsonValue::Object(o) => { + Value::Object(o.into_iter().map(|(k, v)| (k, Value::from(v))).collect()) + } + } + } +} + +impl From for JsonValue { + fn from(value: Value) -> Self { + match value { + Value::Null => JsonValue::Null, + Value::Bool(b) => JsonValue::Bool(b), + Value::Number(n) => match n { + Number::I64(i) => JsonValue::Number(i.into()), + Number::U64(u) => JsonValue::Number(u.into()), + Number::F64(f) => match serde_json::Number::from_f64(f.0) { + None => JsonValue::Null, + Some(n) => JsonValue::Number(n), + }, + }, + Value::String(s) => JsonValue::String(s), + Value::Timestamp(ts) => JsonValue::String(format!("{ts}")), + Value::Interval(sd) => JsonValue::String(format!("{sd}")), + Value::Binary(b) => JsonValue::String(String::from_utf8_lossy(&b).to_string()), + Value::Array(a) => JsonValue::Array(a.into_iter().map(JsonValue::from).collect()), + Value::Object(o) => JsonValue::Object( + o.into_iter() + .map(|(k, v)| (k, JsonValue::from(v))) + .collect(), + ), + } + } +} diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-10.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-10.snap new file mode 100644 index 0000000..1825cb3 --- /dev/null +++ b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-10.snap @@ -0,0 +1,6 @@ +--- +source: spath/src/json/tests.rs +expression: "assert_testdata_identical(\"rfc-9535-example-10.json\")" +snapshot_kind: text +--- +{"a":null,"b":[null],"c":[{}],"null":1} diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-2.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-2.snap new file mode 100644 index 0000000..167c157 --- /dev/null +++ b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-2.snap @@ -0,0 +1,6 @@ +--- +source: spath/src/json/tests.rs +expression: "assert_testdata_identical(\"rfc-9535-example-2.json\")" +snapshot_kind: text +--- +{"'":{"@":2},"o":{"j j":{"k.k":3}}} diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-3.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-3.snap new file mode 100644 index 0000000..81e57ad --- /dev/null +++ b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-3.snap @@ -0,0 +1,6 @@ +--- +source: spath/src/json/tests.rs +expression: "assert_testdata_identical(\"rfc-9535-example-3.json\")" +snapshot_kind: text +--- +{"a":[5,3],"o":{"j":1,"k":2}} diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-4.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-4.snap new file mode 100644 index 0000000..09d34b8 --- /dev/null +++ b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-4.snap @@ -0,0 +1,6 @@ +--- +source: spath/src/json/tests.rs +expression: "assert_testdata_identical(\"rfc-9535-example-4.json\")" +snapshot_kind: text +--- +['a','b'] diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-5.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-5.snap new file mode 100644 index 0000000..571f0cb --- /dev/null +++ b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-5.snap @@ -0,0 +1,6 @@ +--- +source: spath/src/json/tests.rs +expression: "assert_testdata_identical(\"rfc-9535-example-5.json\")" +snapshot_kind: text +--- +['a','b'] diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-6.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-6.snap new file mode 100644 index 0000000..2553f0e --- /dev/null +++ b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-6.snap @@ -0,0 +1,6 @@ +--- +source: spath/src/json/tests.rs +expression: "assert_testdata_identical(\"rfc-9535-example-6.json\")" +snapshot_kind: text +--- +{"arr":[2,3],"obj":{"x":'y'}} diff --git a/spath/src/tests/snapshots/spath__tests__serde_json_to_variant.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-7.snap similarity index 58% rename from spath/src/tests/snapshots/spath__tests__serde_json_to_variant.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-7.snap index f26af61..ba3b006 100644 --- a/spath/src/tests/snapshots/spath__tests__serde_json_to_variant.snap +++ b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-7.snap @@ -1,6 +1,6 @@ --- -source: spath/src/tests/mod.rs -expression: value +source: spath/src/json/tests.rs +expression: "assert_testdata_identical(\"rfc-9535-example-7.json\")" snapshot_kind: text --- {"a":[3,5,1,2,4,6,{"b":'j'},{"b":'k'},{"b":{}},{"b":'kilo'}],"e":'f',"o":{"p":1,"q":2,"r":3,"s":5,"t":{"u":6}}} diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-8.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-8.snap new file mode 100644 index 0000000..dbaafec --- /dev/null +++ b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-8.snap @@ -0,0 +1,6 @@ +--- +source: spath/src/json/tests.rs +expression: "assert_testdata_identical(\"rfc-9535-example-8.json\")" +snapshot_kind: text +--- +['a','b','c','d','e','f','g'] diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-9.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-9.snap new file mode 100644 index 0000000..46fb1c2 --- /dev/null +++ b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-9.snap @@ -0,0 +1,6 @@ +--- +source: spath/src/json/tests.rs +expression: "assert_testdata_identical(\"rfc-9535-example-9.json\")" +snapshot_kind: text +--- +{"a":[5,3,[{"j":4},{"k":6}]],"o":{"j":1,"k":2}} diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion.snap new file mode 100644 index 0000000..2927bd7 --- /dev/null +++ b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion.snap @@ -0,0 +1,6 @@ +--- +source: spath/src/json/tests.rs +expression: "assert_testdata_identical(\"rfc-9535-example-1.json\")" +snapshot_kind: text +--- +{"store":{"bicycle":{"color":'red',"price":399},"book":[{"author":'Nigel Rees',"category":'reference',"price":8.95,"title":'Sayings of the Century'},{"author":'Evelyn Waugh',"category":'fiction',"price":12.99,"title":'Sword of Honour'},{"author":'Herman Melville',"category":'fiction',"isbn":'0-553-21311-3',"price":8.99,"title":'Moby Dick'},{"author":'J. R. R. Tolkien',"category":'fiction',"isbn":'0-395-19395-8',"price":22.99,"title":'The Lord of the Rings'}]}} diff --git a/spath/src/json/tests.rs b/spath/src/json/tests.rs new file mode 100644 index 0000000..97c16b3 --- /dev/null +++ b/spath/src/json/tests.rs @@ -0,0 +1,41 @@ +use crate::{manifest_dir, Value}; +use googletest::assert_that; +use googletest::matchers::eq; +use insta::assert_snapshot; + +use serde_json::Value as JsonValue; + +fn assert_testdata_identical(path: &str) -> String { + let path = manifest_dir().join("testdata").join(path); + let literal = std::fs::read_to_string(&path).unwrap(); + + let json_value = serde_json::from_str::(&literal).unwrap(); + let value = Value::from(json_value.clone()); + assert_that!(json_value, eq(&JsonValue::from(value.clone()))); + + format!("{:?}", value) +} + +#[test] +fn test_rfc_9535_example_convertion() { + // §1.5 Figure 1 + assert_snapshot!(assert_testdata_identical("rfc-9535-example-1.json")); + // §2.3.1.3 Example + assert_snapshot!(assert_testdata_identical("rfc-9535-example-2.json")); + // §2.3.2.3 Example + assert_snapshot!(assert_testdata_identical("rfc-9535-example-3.json")); + // §2.3.3.3 Example + assert_snapshot!(assert_testdata_identical("rfc-9535-example-4.json")); + // §2.3.4.3 Example + assert_snapshot!(assert_testdata_identical("rfc-9535-example-5.json")); + // §2.3.5.3 Example 1 + assert_snapshot!(assert_testdata_identical("rfc-9535-example-6.json")); + // §2.3.5.3 Example 2 + assert_snapshot!(assert_testdata_identical("rfc-9535-example-7.json")); + // §2.5.1.3 Example 1 + assert_snapshot!(assert_testdata_identical("rfc-9535-example-8.json")); + // §2.5.2.3 Example 2 + assert_snapshot!(assert_testdata_identical("rfc-9535-example-9.json")); + // §2.6.1 Example + assert_snapshot!(assert_testdata_identical("rfc-9535-example-10.json")); +} diff --git a/spath/src/lib.rs b/spath/src/lib.rs index cf993f3..710f6d0 100644 --- a/spath/src/lib.rs +++ b/spath/src/lib.rs @@ -26,4 +26,7 @@ mod json; mod parser; #[cfg(test)] -mod tests; +fn manifest_dir() -> std::path::PathBuf { + let dir = env!("CARGO_MANIFEST_DIR"); + std::path::PathBuf::from(dir).canonicalize().unwrap() +} diff --git a/spath/src/tests/mod.rs b/spath/src/tests/mod.rs deleted file mode 100644 index 6867236..0000000 --- a/spath/src/tests/mod.rs +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2024 tison -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use insta::assert_debug_snapshot; - -use crate::Value; - -#[cfg(feature = "json")] -#[test] -fn test_serde_json_to_variant() { - let value: serde_json::Value = serde_json::from_str(include_str!("simple.json")).unwrap(); - let value = Value::from(value); - assert_debug_snapshot!(value); -} diff --git a/spath/testdata/rfc-9535-example-1.json b/spath/testdata/rfc-9535-example-1.json new file mode 100644 index 0000000..9a4d3cf --- /dev/null +++ b/spath/testdata/rfc-9535-example-1.json @@ -0,0 +1,36 @@ +{ + "store": { + "book": [ + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 + }, + { + "category": "fiction", + "author": "Evelyn Waugh", + "title": "Sword of Honour", + "price": 12.99 + }, + { + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "isbn": "0-553-21311-3", + "price": 8.99 + }, + { + "category": "fiction", + "author": "J. R. R. Tolkien", + "title": "The Lord of the Rings", + "isbn": "0-395-19395-8", + "price": 22.99 + } + ], + "bicycle": { + "color": "red", + "price": 399 + } + } +} diff --git a/spath/testdata/rfc-9535-example-10.json b/spath/testdata/rfc-9535-example-10.json new file mode 100644 index 0000000..f5ea64b --- /dev/null +++ b/spath/testdata/rfc-9535-example-10.json @@ -0,0 +1 @@ +{"a": null, "b": [null], "c": [{}], "null": 1} diff --git a/spath/testdata/rfc-9535-example-2.json b/spath/testdata/rfc-9535-example-2.json new file mode 100644 index 0000000..6c9a6b8 --- /dev/null +++ b/spath/testdata/rfc-9535-example-2.json @@ -0,0 +1,4 @@ +{ + "o": {"j j": {"k.k": 3}}, + "'": {"@": 2} +} diff --git a/spath/testdata/rfc-9535-example-3.json b/spath/testdata/rfc-9535-example-3.json new file mode 100644 index 0000000..02cbb64 --- /dev/null +++ b/spath/testdata/rfc-9535-example-3.json @@ -0,0 +1,4 @@ +{ + "o": {"j": 1, "k": 2}, + "a": [5, 3] +} diff --git a/spath/testdata/rfc-9535-example-4.json b/spath/testdata/rfc-9535-example-4.json new file mode 100644 index 0000000..7d2570c --- /dev/null +++ b/spath/testdata/rfc-9535-example-4.json @@ -0,0 +1 @@ +["a","b"] diff --git a/spath/testdata/rfc-9535-example-5.json b/spath/testdata/rfc-9535-example-5.json new file mode 100644 index 0000000..7d2570c --- /dev/null +++ b/spath/testdata/rfc-9535-example-5.json @@ -0,0 +1 @@ +["a","b"] diff --git a/spath/testdata/rfc-9535-example-6.json b/spath/testdata/rfc-9535-example-6.json new file mode 100644 index 0000000..6f12af8 --- /dev/null +++ b/spath/testdata/rfc-9535-example-6.json @@ -0,0 +1,4 @@ +{ + "obj": {"x": "y"}, + "arr": [2, 3] +} diff --git a/spath/src/tests/simple.json b/spath/testdata/rfc-9535-example-7.json similarity index 100% rename from spath/src/tests/simple.json rename to spath/testdata/rfc-9535-example-7.json diff --git a/spath/testdata/rfc-9535-example-8.json b/spath/testdata/rfc-9535-example-8.json new file mode 100644 index 0000000..1b72d36 --- /dev/null +++ b/spath/testdata/rfc-9535-example-8.json @@ -0,0 +1 @@ +["a", "b", "c", "d", "e", "f", "g"] diff --git a/spath/testdata/rfc-9535-example-9.json b/spath/testdata/rfc-9535-example-9.json new file mode 100644 index 0000000..eb76eef --- /dev/null +++ b/spath/testdata/rfc-9535-example-9.json @@ -0,0 +1,4 @@ +{ + "o": {"j": 1, "k": 2}, + "a": [5, 3, [{"j": 4}, {"k": 6}]] +} From 4e51d774e37f58cff5716d08950744b9213d2a7d Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 10:37:23 +0800 Subject: [PATCH 03/16] clippy Signed-off-by: tison --- spath/src/expr.rs | 13 ++++++++++ spath/src/json/mod.rs | 4 +-- ...ests__rfc_9535_example_conversion-10.snap} | 0 ...tests__rfc_9535_example_conversion-2.snap} | 0 ...tests__rfc_9535_example_conversion-3.snap} | 0 ...tests__rfc_9535_example_conversion-4.snap} | 0 ...tests__rfc_9535_example_conversion-5.snap} | 0 ...tests__rfc_9535_example_conversion-6.snap} | 0 ...tests__rfc_9535_example_conversion-7.snap} | 0 ...tests__rfc_9535_example_conversion-8.snap} | 0 ...tests__rfc_9535_example_conversion-9.snap} | 0 ...__tests__rfc_9535_example_conversion.snap} | 0 spath/src/json/tests.rs | 21 +++++++++++++--- spath/src/lib.rs | 1 - spath/src/parser/error.rs | 14 +++++++++++ spath/src/parser/mod.rs | 16 +++++++++++- spath/src/parser/range.rs | 14 +++++++++++ spath/src/parser/runner.rs | 21 ++++++++++++++++ spath/src/parser/token.rs | 25 ++++++++++++++++--- 19 files changed, 119 insertions(+), 10 deletions(-) rename spath/src/json/snapshots/{spath__json__tests__rfc_9535_example_convertion-10.snap => spath__json__tests__rfc_9535_example_conversion-10.snap} (100%) rename spath/src/json/snapshots/{spath__json__tests__rfc_9535_example_convertion-2.snap => spath__json__tests__rfc_9535_example_conversion-2.snap} (100%) rename spath/src/json/snapshots/{spath__json__tests__rfc_9535_example_convertion-3.snap => spath__json__tests__rfc_9535_example_conversion-3.snap} (100%) rename spath/src/json/snapshots/{spath__json__tests__rfc_9535_example_convertion-4.snap => spath__json__tests__rfc_9535_example_conversion-4.snap} (100%) rename spath/src/json/snapshots/{spath__json__tests__rfc_9535_example_convertion-5.snap => spath__json__tests__rfc_9535_example_conversion-5.snap} (100%) rename spath/src/json/snapshots/{spath__json__tests__rfc_9535_example_convertion-6.snap => spath__json__tests__rfc_9535_example_conversion-6.snap} (100%) rename spath/src/json/snapshots/{spath__json__tests__rfc_9535_example_convertion-7.snap => spath__json__tests__rfc_9535_example_conversion-7.snap} (100%) rename spath/src/json/snapshots/{spath__json__tests__rfc_9535_example_convertion-8.snap => spath__json__tests__rfc_9535_example_conversion-8.snap} (100%) rename spath/src/json/snapshots/{spath__json__tests__rfc_9535_example_convertion-9.snap => spath__json__tests__rfc_9535_example_conversion-9.snap} (100%) rename spath/src/json/snapshots/{spath__json__tests__rfc_9535_example_convertion.snap => spath__json__tests__rfc_9535_example_conversion.snap} (100%) create mode 100644 spath/src/parser/runner.rs diff --git a/spath/src/expr.rs b/spath/src/expr.rs index e69de29..2d282e0 100644 --- a/spath/src/expr.rs +++ b/spath/src/expr.rs @@ -0,0 +1,13 @@ +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. diff --git a/spath/src/json/mod.rs b/spath/src/json/mod.rs index c94ac0a..2d2c24b 100644 --- a/spath/src/json/mod.rs +++ b/spath/src/json/mod.rs @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use serde_json::Value as JsonValue; + use crate::Number; use crate::Value; -use serde_json::Value as JsonValue; - #[cfg(test)] mod tests; diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-10.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-10.snap similarity index 100% rename from spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-10.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-10.snap diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-2.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-2.snap similarity index 100% rename from spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-2.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-2.snap diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-3.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-3.snap similarity index 100% rename from spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-3.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-3.snap diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-4.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-4.snap similarity index 100% rename from spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-4.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-4.snap diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-5.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-5.snap similarity index 100% rename from spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-5.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-5.snap diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-6.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-6.snap similarity index 100% rename from spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-6.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-6.snap diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-7.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-7.snap similarity index 100% rename from spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-7.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-7.snap diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-8.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-8.snap similarity index 100% rename from spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-8.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-8.snap diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-9.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-9.snap similarity index 100% rename from spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion-9.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion-9.snap diff --git a/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion.snap b/spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion.snap similarity index 100% rename from spath/src/json/snapshots/spath__json__tests__rfc_9535_example_convertion.snap rename to spath/src/json/snapshots/spath__json__tests__rfc_9535_example_conversion.snap diff --git a/spath/src/json/tests.rs b/spath/src/json/tests.rs index 97c16b3..69908fb 100644 --- a/spath/src/json/tests.rs +++ b/spath/src/json/tests.rs @@ -1,10 +1,25 @@ -use crate::{manifest_dir, Value}; +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use googletest::assert_that; use googletest::matchers::eq; use insta::assert_snapshot; - use serde_json::Value as JsonValue; +use crate::manifest_dir; +use crate::Value; + fn assert_testdata_identical(path: &str) -> String { let path = manifest_dir().join("testdata").join(path); let literal = std::fs::read_to_string(&path).unwrap(); @@ -17,7 +32,7 @@ fn assert_testdata_identical(path: &str) -> String { } #[test] -fn test_rfc_9535_example_convertion() { +fn test_rfc_9535_example_conversion() { // §1.5 Figure 1 assert_snapshot!(assert_testdata_identical("rfc-9535-example-1.json")); // §2.3.1.3 Example diff --git a/spath/src/lib.rs b/spath/src/lib.rs index 710f6d0..6d51e68 100644 --- a/spath/src/lib.rs +++ b/spath/src/lib.rs @@ -19,7 +19,6 @@ mod error; pub use error::*; mod expr; -pub use expr::*; #[cfg(feature = "json")] mod json; diff --git a/spath/src/parser/error.rs b/spath/src/parser/error.rs index 408cf37..1e66fc2 100644 --- a/spath/src/parser/error.rs +++ b/spath/src/parser/error.rs @@ -1,3 +1,17 @@ +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use crate::parser::range::Range; #[derive(Debug, thiserror::Error)] diff --git a/spath/src/parser/mod.rs b/spath/src/parser/mod.rs index 13aa4b0..9819ad3 100644 --- a/spath/src/parser/mod.rs +++ b/spath/src/parser/mod.rs @@ -1,4 +1,18 @@ +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + mod error; mod range; +mod runner; mod token; - diff --git a/spath/src/parser/range.rs b/spath/src/parser/range.rs index 6ae935d..406653f 100644 --- a/spath/src/parser/range.rs +++ b/spath/src/parser/range.rs @@ -1,3 +1,17 @@ +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + use std::fmt; #[derive(Clone, Copy, PartialEq, Eq)] diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs new file mode 100644 index 0000000..ee8c6a5 --- /dev/null +++ b/spath/src/parser/runner.rs @@ -0,0 +1,21 @@ +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::parser::error::ParseError; +use crate::parser::token::Token; +use crate::parser::token::Tokenizer; + +pub fn run_tokenizer(source: &str) -> Result, ParseError> { + Tokenizer::new(source).collect::>() +} diff --git a/spath/src/parser/token.rs b/spath/src/parser/token.rs index dd4f5fc..02646cf 100644 --- a/spath/src/parser/token.rs +++ b/spath/src/parser/token.rs @@ -1,7 +1,24 @@ +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt; + +use logos::Lexer; +use logos::Logos; + use crate::parser::error::ParseError; use crate::parser::range::Range; -use logos::{Lexer, Logos}; -use std::fmt; #[derive(Clone, PartialEq, Eq)] pub struct Token<'a> { @@ -73,7 +90,7 @@ impl<'a> Iterator for Tokenizer<'a> { } } -#[allow(non_camel_case_types)] +#[allow(clippy::upper_case_acronyms)] #[derive(logos::Logos, Clone, Copy, Debug, PartialEq, Eq, Hash)] pub enum TokenKind { EOI, @@ -122,6 +139,8 @@ pub enum TokenKind { At, #[token(".")] Dot, + #[token("*")] + Asterisk, #[token("?")] QuestionMark, #[token("(")] From bacdf46c10e9fd35b4525b2286b1a9eb3de22b64 Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 11:01:40 +0800 Subject: [PATCH 04/16] normalized path Signed-off-by: tison --- spath/src/expr.rs | 13 ------------- spath/src/lib.rs | 2 -- spath/src/parser/mod.rs | 1 + spath/src/parser/selector.rs | 24 ++++++++++++++++++++++++ spath/src/parser/token.rs | 2 ++ 5 files changed, 27 insertions(+), 15 deletions(-) delete mode 100644 spath/src/expr.rs create mode 100644 spath/src/parser/selector.rs diff --git a/spath/src/expr.rs b/spath/src/expr.rs deleted file mode 100644 index 2d282e0..0000000 --- a/spath/src/expr.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2024 tison -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. diff --git a/spath/src/lib.rs b/spath/src/lib.rs index 6d51e68..32cf1c9 100644 --- a/spath/src/lib.rs +++ b/spath/src/lib.rs @@ -18,8 +18,6 @@ pub use value::*; mod error; pub use error::*; -mod expr; - #[cfg(feature = "json")] mod json; mod parser; diff --git a/spath/src/parser/mod.rs b/spath/src/parser/mod.rs index 9819ad3..b0e0ba9 100644 --- a/spath/src/parser/mod.rs +++ b/spath/src/parser/mod.rs @@ -15,4 +15,5 @@ mod error; mod range; mod runner; +mod selector; mod token; diff --git a/spath/src/parser/selector.rs b/spath/src/parser/selector.rs new file mode 100644 index 0000000..761b39c --- /dev/null +++ b/spath/src/parser/selector.rs @@ -0,0 +1,24 @@ +#[derive(Debug, Clone)] +pub enum Selector { + /// §2.3.2 Wildcard Selector. + Asterisk, + /// §2.3.1 Name Selector. + Identifier { + /// The name of the selector. + name: String, + }, + /// §2.3.3 Index Selector. + Index { + /// The index of the selector. + index: i64, + }, + /// §2.3.4 Array Slice Selector. + Slice { + /// The start index of the slice, inclusive. Default to 0. + start: Option, + /// The end index of the slice, exclusive. Default to the length of the array. + end: Option, + /// The step to iterate the slice. Default to 1. + step: Option, + }, +} diff --git a/spath/src/parser/token.rs b/spath/src/parser/token.rs index 02646cf..31e1587 100644 --- a/spath/src/parser/token.rs +++ b/spath/src/parser/token.rs @@ -141,6 +141,8 @@ pub enum TokenKind { Dot, #[token("*")] Asterisk, + #[token(":")] + Colon, #[token("?")] QuestionMark, #[token("(")] From 42142cbf33d761398b0a228a3c03caf171631c38 Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 11:13:35 +0800 Subject: [PATCH 05/16] impl parser Signed-off-by: tison --- spath/src/parser/{selector.rs => ast.rs} | 14 ++++++++++++++ spath/src/parser/mod.rs | 2 +- spath/src/parser/runner.rs | 5 +++++ 3 files changed, 20 insertions(+), 1 deletion(-) rename spath/src/parser/{selector.rs => ast.rs} (67%) diff --git a/spath/src/parser/selector.rs b/spath/src/parser/ast.rs similarity index 67% rename from spath/src/parser/selector.rs rename to spath/src/parser/ast.rs index 761b39c..34464ed 100644 --- a/spath/src/parser/selector.rs +++ b/spath/src/parser/ast.rs @@ -1,3 +1,17 @@ +#[derive(Debug, Clone)] +pub enum Segment { + /// §2.5.1 Child Segment. + Child { + /// The selectors of the child segment. + selector: Vec, + }, + /// §2.5.2 Descendant Segment. + Descendant { + /// The selectors of the descendant segment. + selector: Vec, + } +} + #[derive(Debug, Clone)] pub enum Selector { /// §2.3.2 Wildcard Selector. diff --git a/spath/src/parser/mod.rs b/spath/src/parser/mod.rs index b0e0ba9..800ab14 100644 --- a/spath/src/parser/mod.rs +++ b/spath/src/parser/mod.rs @@ -15,5 +15,5 @@ mod error; mod range; mod runner; -mod selector; +mod ast; mod token; diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs index ee8c6a5..3066c8e 100644 --- a/spath/src/parser/runner.rs +++ b/spath/src/parser/runner.rs @@ -13,9 +13,14 @@ // limitations under the License. use crate::parser::error::ParseError; +use crate::parser::ast::Selector; use crate::parser::token::Token; use crate::parser::token::Tokenizer; pub fn run_tokenizer(source: &str) -> Result, ParseError> { Tokenizer::new(source).collect::>() } + +pub fn run_parser(source: &str) -> Result, ParseError> { + let tokens = run_tokenizer(source)?; +} From c72ffe09f8882e04bb17d8f6a9a004a1751c8c0e Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 11:58:39 +0800 Subject: [PATCH 06/16] handmade parser Signed-off-by: tison --- Cargo.lock | 10 ------ spath/Cargo.toml | 1 - spath/src/parser/ast.rs | 23 +++++++++++- spath/src/parser/error.rs | 18 ++++++---- spath/src/parser/mod.rs | 3 +- spath/src/parser/parse.rs | 71 ++++++++++++++++++++++++++++++++++++++ spath/src/parser/runner.rs | 12 +++++-- spath/src/parser/token.rs | 2 +- 8 files changed, 118 insertions(+), 22 deletions(-) create mode 100644 spath/src/parser/parse.rs diff --git a/Cargo.lock b/Cargo.lock index cb70aed..3ef8d59 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -505,7 +505,6 @@ dependencies = [ "ordered-float", "serde_json", "thiserror", - "winnow", ] [[package]] @@ -642,15 +641,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "winnow" -version = "0.6.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39281189af81c07ec09db316b302a3e67bf9bd7cbf6c820b50e35fee9c2fa980" -dependencies = [ - "memchr", -] - [[package]] name = "winsafe" version = "0.0.19" diff --git a/spath/Cargo.toml b/spath/Cargo.toml index 9493bea..5232ed9 100644 --- a/spath/Cargo.toml +++ b/spath/Cargo.toml @@ -38,7 +38,6 @@ logos = { version = "0.15.0" } num-cmp = { version = "0.1.0" } ordered-float = { version = "4.6.0", features = ["num-cmp"] } thiserror = { version = "2.0.8" } -winnow = { version = "0.6.20" } # optional dependencies serde_json = { version = "1.0.133", optional = true } diff --git a/spath/src/parser/ast.rs b/spath/src/parser/ast.rs index 34464ed..7d7a132 100644 --- a/spath/src/parser/ast.rs +++ b/spath/src/parser/ast.rs @@ -1,3 +1,24 @@ +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// A valid SPath expression. +#[derive(Debug, Clone)] +pub enum Expr { + /// Consists of a series of segments. + Segments { segments: Vec }, +} + #[derive(Debug, Clone)] pub enum Segment { /// §2.5.1 Child Segment. @@ -9,7 +30,7 @@ pub enum Segment { Descendant { /// The selectors of the descendant segment. selector: Vec, - } + }, } #[derive(Debug, Clone)] diff --git a/spath/src/parser/error.rs b/spath/src/parser/error.rs index 1e66fc2..70d37f7 100644 --- a/spath/src/parser/error.rs +++ b/spath/src/parser/error.rs @@ -17,15 +17,21 @@ use crate::parser::range::Range; #[derive(Debug, thiserror::Error)] #[error("{message}")] pub struct ParseError { - span: Range, + range: Range, message: String, } impl ParseError { - pub fn new(span: Range, message: impl Into) -> Self { - Self { - span, - message: message.into(), - } + pub fn new(range: Range, message: impl Into) -> Self { + let message = message.into(); + Self { range, message } + } + + pub fn empty() -> Self { + Self::new(Range { start: 0, end: 0 }, "empty input") + } + + pub fn unexpected_token(range: Range) -> Self { + Self::new(range, "unexpected token") } } diff --git a/spath/src/parser/mod.rs b/spath/src/parser/mod.rs index 800ab14..19c24d3 100644 --- a/spath/src/parser/mod.rs +++ b/spath/src/parser/mod.rs @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod ast; mod error; +mod parse; mod range; mod runner; -mod ast; mod token; diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs new file mode 100644 index 0000000..676fc02 --- /dev/null +++ b/spath/src/parser/parse.rs @@ -0,0 +1,71 @@ +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::parser::ast::Expr; +use crate::parser::ast::Segment; +use crate::parser::error::ParseError; +use crate::parser::token::Token; +use crate::parser::token::TokenKind; + +#[derive(Debug)] +pub struct Parser<'a> { + source: &'a str, + tokens: Vec>, + /// The index of the first unprocessed token in `self.tokens` + index: usize, +} + +impl<'a> Parser<'a> { + pub fn new(source: &'a str, tokens: Vec>) -> Self { + Self { + source, + tokens, + index: 0, + } + } + + pub fn parse(&mut self) -> Result { + let first = self.next_token(); + + match first.kind { + TokenKind::Dollar => {} // the first token must be '$' + TokenKind::EOI => return Err(ParseError::empty()), + _ => return Err(ParseError::unexpected_token(first.span)), + } + + let mut segments = vec![]; + while let Some(segment) = self.parse_segment()? { + segments.push(segment); + } + Ok(Expr::Segments { segments }) + } + + pub fn parse_segment(&mut self) -> Result, ParseError> { + let token = self.next_token(); + match token.kind { + TokenKind::EOI => Ok(None), + _ => Err(ParseError::unexpected_token(token.span)), + } + } + + fn next_token(&mut self) -> Token { + if self.index < self.tokens.len() { + let token = self.tokens[self.index]; + self.index += 1; + token + } else { + Token::new_eoi(self.source) + } + } +} diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs index 3066c8e..b495fcb 100644 --- a/spath/src/parser/runner.rs +++ b/spath/src/parser/runner.rs @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use crate::parser::ast::Expr; use crate::parser::error::ParseError; -use crate::parser::ast::Selector; +use crate::parser::parse::Parser; use crate::parser::token::Token; use crate::parser::token::Tokenizer; @@ -21,6 +22,13 @@ pub fn run_tokenizer(source: &str) -> Result, ParseError> { Tokenizer::new(source).collect::>() } -pub fn run_parser(source: &str) -> Result, ParseError> { +pub fn run_parser(source: &str) -> Result { let tokens = run_tokenizer(source)?; + Parser::new(source, tokens).parse() +} + +#[test] +fn test_run_parser() { + let expr = run_parser("$").unwrap(); + println!("{expr:?}"); } diff --git a/spath/src/parser/token.rs b/spath/src/parser/token.rs index 31e1587..3c6f922 100644 --- a/spath/src/parser/token.rs +++ b/spath/src/parser/token.rs @@ -20,7 +20,7 @@ use logos::Logos; use crate::parser::error::ParseError; use crate::parser::range::Range; -#[derive(Clone, PartialEq, Eq)] +#[derive(Clone, Copy, PartialEq, Eq)] pub struct Token<'a> { pub source: &'a str, pub kind: TokenKind, From 19811818f514683754b67bc8b5730b4f54a035b8 Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 13:03:48 +0800 Subject: [PATCH 07/16] handmade more Signed-off-by: tison --- spath/src/parser/ast.rs | 6 ++-- spath/src/parser/parse.rs | 69 ++++++++++++++++++++++++++++++++++++-- spath/src/parser/runner.rs | 2 +- spath/src/parser/token.rs | 2 ++ 4 files changed, 73 insertions(+), 6 deletions(-) diff --git a/spath/src/parser/ast.rs b/spath/src/parser/ast.rs index 7d7a132..2cedc0f 100644 --- a/spath/src/parser/ast.rs +++ b/spath/src/parser/ast.rs @@ -24,19 +24,19 @@ pub enum Segment { /// §2.5.1 Child Segment. Child { /// The selectors of the child segment. - selector: Vec, + selectors: Vec, }, /// §2.5.2 Descendant Segment. Descendant { /// The selectors of the descendant segment. - selector: Vec, + selectors: Vec, }, } #[derive(Debug, Clone)] pub enum Selector { /// §2.3.2 Wildcard Selector. - Asterisk, + Wildcard, /// §2.3.1 Name Selector. Identifier { /// The name of the selector. diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs index 676fc02..36ed300 100644 --- a/spath/src/parser/parse.rs +++ b/spath/src/parser/parse.rs @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::parser::ast::Expr; use crate::parser::ast::Segment; +use crate::parser::ast::{Expr, Selector}; use crate::parser::error::ParseError; use crate::parser::token::Token; use crate::parser::token::TokenKind; @@ -38,6 +38,7 @@ impl<'a> Parser<'a> { pub fn parse(&mut self) -> Result { let first = self.next_token(); + // §2.2.1 (Root Identifier) Syntax match first.kind { TokenKind::Dollar => {} // the first token must be '$' TokenKind::EOI => return Err(ParseError::empty()), @@ -51,14 +52,78 @@ impl<'a> Parser<'a> { Ok(Expr::Segments { segments }) } - pub fn parse_segment(&mut self) -> Result, ParseError> { + fn parse_segment(&mut self) -> Result, ParseError> { let token = self.next_token(); match token.kind { TokenKind::EOI => Ok(None), + TokenKind::LBracket => { + let selectors = self.parse_bracketed_selector()?; + Ok(Some(Segment::Child { selectors })) + } + // TODO(tisonkun): handle ..(descendant segment), .identifier, and .* _ => Err(ParseError::unexpected_token(token.span)), } } + fn parse_bracketed_selector(&mut self) -> Result, ParseError> { + let mut selectors = vec![]; + let mut prev_comma = false; + loop { + let token = self.peek_token(); + match token.kind { + TokenKind::EOI | TokenKind::RBracket => { + let _consumed = self.next_token(); + return Ok(selectors); + } + TokenKind::Comma => { + if prev_comma || selectors.is_empty() { + return Err(ParseError::unexpected_token(token.span)); + } + prev_comma = true; + let _consumed = self.next_token(); + } + _ => { + if prev_comma || selectors.is_empty() { + prev_comma = false; + let selector = self.parse_selector()?; + selectors.push(selector); + } else { + return Err(ParseError::unexpected_token(token.span)); + } + } + } + } + } + + fn parse_selector(&mut self) -> Result { + let token = self.next_token(); + match token.kind { + TokenKind::Asterisk => Ok(Selector::Wildcard), + TokenKind::LiteralString => { + // TODO(tisonkun): unescape the string + let text = token.text(); + Ok(Selector::Identifier { + name: text.to_string(), + }) + } + TokenKind::LiteralInteger => { + // TODO(tisonkun): dispatch slice-selector + let text = token.text(); + let index = text.parse().unwrap(); + Ok(Selector::Index { index }) + } + _ => Err(ParseError::unexpected_token(token.span)), + } + } + + fn peek_token(&self) -> Token { + if self.index < self.tokens.len() { + self.tokens[self.index] + } else { + Token::new_eoi(self.source) + } + } + fn next_token(&mut self) -> Token { if self.index < self.tokens.len() { let token = self.tokens[self.index]; diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs index b495fcb..7991575 100644 --- a/spath/src/parser/runner.rs +++ b/spath/src/parser/runner.rs @@ -29,6 +29,6 @@ pub fn run_parser(source: &str) -> Result { #[test] fn test_run_parser() { - let expr = run_parser("$").unwrap(); + let expr = run_parser(r#"$["name"][0]"#).unwrap(); println!("{expr:?}"); } diff --git a/spath/src/parser/token.rs b/spath/src/parser/token.rs index 3c6f922..47affae 100644 --- a/spath/src/parser/token.rs +++ b/spath/src/parser/token.rs @@ -143,6 +143,8 @@ pub enum TokenKind { Asterisk, #[token(":")] Colon, + #[token(",")] + Comma, #[token("?")] QuestionMark, #[token("(")] From 2a34f6538c9336e17859cddbdcab9b773e39dc6e Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 13:15:11 +0800 Subject: [PATCH 08/16] parse integer Signed-off-by: tison --- spath/Cargo.toml | 1 + spath/src/lib.rs | 1 + spath/src/parser/parse.rs | 9 +++++++-- spath/src/parser/runner.rs | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/spath/Cargo.toml b/spath/Cargo.toml index 5232ed9..307bcbf 100644 --- a/spath/Cargo.toml +++ b/spath/Cargo.toml @@ -30,6 +30,7 @@ repository.workspace = true rust-version.workspace = true [features] +default = ["json"] json = ["dep:serde_json"] [dependencies] diff --git a/spath/src/lib.rs b/spath/src/lib.rs index 32cf1c9..e569f3c 100644 --- a/spath/src/lib.rs +++ b/spath/src/lib.rs @@ -20,6 +20,7 @@ pub use error::*; #[cfg(feature = "json")] mod json; +#[allow(dead_code)] mod parser; #[cfg(test)] diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs index 36ed300..049a20c 100644 --- a/spath/src/parser/parse.rs +++ b/spath/src/parser/parse.rs @@ -108,8 +108,7 @@ impl<'a> Parser<'a> { } TokenKind::LiteralInteger => { // TODO(tisonkun): dispatch slice-selector - let text = token.text(); - let index = text.parse().unwrap(); + let index = parse_integer(token)?; Ok(Selector::Index { index }) } _ => Err(ParseError::unexpected_token(token.span)), @@ -134,3 +133,9 @@ impl<'a> Parser<'a> { } } } + +fn parse_integer(token: Token) -> Result { + let text = token.text(); + text.parse() + .map_err(|err| ParseError::new(token.span, format!("{err}"))) +} diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs index 7991575..3aabeb7 100644 --- a/spath/src/parser/runner.rs +++ b/spath/src/parser/runner.rs @@ -29,6 +29,6 @@ pub fn run_parser(source: &str) -> Result { #[test] fn test_run_parser() { - let expr = run_parser(r#"$["name"][0]"#).unwrap(); + let expr = run_parser(r#"$["name"][10]"#).unwrap(); println!("{expr:?}"); } From ecd3050584ba571f25adf5659813660dd279c825 Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 13:24:03 +0800 Subject: [PATCH 09/16] parse string Signed-off-by: tison --- spath/src/parser/parse.rs | 86 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 5 deletions(-) diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs index 049a20c..4021a3b 100644 --- a/spath/src/parser/parse.rs +++ b/spath/src/parser/parse.rs @@ -17,6 +17,7 @@ use crate::parser::ast::{Expr, Selector}; use crate::parser::error::ParseError; use crate::parser::token::Token; use crate::parser::token::TokenKind; +use std::iter::Peekable; #[derive(Debug)] pub struct Parser<'a> { @@ -100,11 +101,8 @@ impl<'a> Parser<'a> { match token.kind { TokenKind::Asterisk => Ok(Selector::Wildcard), TokenKind::LiteralString => { - // TODO(tisonkun): unescape the string - let text = token.text(); - Ok(Selector::Identifier { - name: text.to_string(), - }) + let name = parse_string(token)?; + Ok(Selector::Identifier { name }) } TokenKind::LiteralInteger => { // TODO(tisonkun): dispatch slice-selector @@ -139,3 +137,81 @@ fn parse_integer(token: Token) -> Result { text.parse() .map_err(|err| ParseError::new(token.span, format!("{err}"))) } + +fn parse_string(token: Token) -> Result { + let text = token.text(); + let mut chars = text.chars(); + + let quote = chars.next().expect("quote char always exist"); + if chars.next_back().map_or(true, |ch| ch != quote) { + return Err(ParseError::new(token.span, "mismatched quote")); + } + + let mut chars = chars.peekable(); + let mut output = String::new(); + + while let Some(c) = chars.next() { + if c == '\\' { + match chars.next() { + Some('b') => output.push('\u{0008}'), + Some('f') => output.push('\u{000C}'), + Some('n') => output.push('\n'), + Some('r') => output.push('\r'), + Some('t') => output.push('\t'), + Some('\\') => output.push('\\'), + Some('u') => output.push( + unescape_unicode(&mut chars) + .ok_or_else(|| ParseError::new(token.span, "invalid escape sequence"))?, + ), + Some('x') => output.push( + unescape_byte(&mut chars) + .ok_or_else(|| ParseError::new(token.span, "invalid escape sequence"))?, + ), + Some(c) if c.is_digit(8) => output.push(unescape_octal(c, &mut chars)), + Some(c) if c == quote => output.push(quote), + _ => return Err(ParseError::new(token.span, "invalid escape sequence")), + }; + } else if c == quote { + return Err(ParseError::new(token.span, "intermediately close quote")); + } else { + output.push(c); + } + } + + Ok(output) +} + +fn unescape_unicode(chars: &mut Peekable>) -> Option { + let mut code = 0; + + for c in chars.take(4) { + code = code * 16 + c.to_digit(16)?; + } + + char::from_u32(code) +} + +fn unescape_byte(chars: &mut Peekable>) -> Option { + let mut byte = 0; + + for c in chars.take(2) { + byte = byte * 16 + c.to_digit(16)?; + } + + char::from_u32(byte) +} + +fn unescape_octal(c1: char, chars: &mut Peekable>) -> char { + let mut oct = c1.to_digit(8).unwrap(); + + while let Some(c) = chars.peek() { + if let Some(digit) = c.to_digit(8) { + oct = oct * 8 + digit; + chars.next(); + } else { + break; + } + } + + char::from_u32(oct).unwrap() +} From e73bc2584bea7263d5ef94ba69052c513be2063e Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 13:46:12 +0800 Subject: [PATCH 10/16] slice Signed-off-by: tison --- spath/src/parser/ast.rs | 4 ++-- spath/src/parser/parse.rs | 47 +++++++++++++++++++++++++++++++++++--- spath/src/parser/runner.rs | 2 +- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/spath/src/parser/ast.rs b/spath/src/parser/ast.rs index 2cedc0f..3eb6f97 100644 --- a/spath/src/parser/ast.rs +++ b/spath/src/parser/ast.rs @@ -50,10 +50,10 @@ pub enum Selector { /// §2.3.4 Array Slice Selector. Slice { /// The start index of the slice, inclusive. Default to 0. - start: Option, + start: i64, /// The end index of the slice, exclusive. Default to the length of the array. end: Option, /// The step to iterate the slice. Default to 1. - step: Option, + step: i64, }, } diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs index 4021a3b..81efd4f 100644 --- a/spath/src/parser/parse.rs +++ b/spath/src/parser/parse.rs @@ -72,7 +72,10 @@ impl<'a> Parser<'a> { loop { let token = self.peek_token(); match token.kind { - TokenKind::EOI | TokenKind::RBracket => { + TokenKind::EOI => { + return Err(ParseError::new(token.span, "unclosed bracket")); + } + TokenKind::RBracket => { let _consumed = self.next_token(); return Ok(selectors); } @@ -105,14 +108,52 @@ impl<'a> Parser<'a> { Ok(Selector::Identifier { name }) } TokenKind::LiteralInteger => { - // TODO(tisonkun): dispatch slice-selector let index = parse_integer(token)?; - Ok(Selector::Index { index }) + if self.consume_token(TokenKind::Colon).is_some() { + self.parse_slice_selector(index) + } else { + Ok(Selector::Index { index }) + } } + TokenKind::Colon => self.parse_slice_selector(0), _ => Err(ParseError::unexpected_token(token.span)), } } + fn parse_slice_selector(&mut self, start: i64) -> Result { + let token = self.next_token(); + let mut end = None; + let mut step = 1; + match token.kind { + TokenKind::Colon => { + if let Some(token) = self.consume_token(TokenKind::LiteralInteger) { + // start::step + step = parse_integer(token)?; + } // else - start:: + } + TokenKind::LiteralInteger => { + end = Some(parse_integer(token)?); + if self.consume_token(TokenKind::Colon).is_some() { + if let Some(token) = self.consume_token(TokenKind::LiteralInteger) { + // start:end:step + step = parse_integer(token)?; + } // else - start:end: + } // else - start:end + } + _ => return Err(ParseError::unexpected_token(token.span)), + } + Ok(Selector::Slice { start, end, step }) + } + + fn consume_token(&mut self, kind: TokenKind) -> Option { + let token = self.peek_token(); + if token.kind == kind { + Some(self.next_token()) + } else { + None + } + } + fn peek_token(&self) -> Token { if self.index < self.tokens.len() { self.tokens[self.index] diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs index 3aabeb7..deb65f6 100644 --- a/spath/src/parser/runner.rs +++ b/spath/src/parser/runner.rs @@ -29,6 +29,6 @@ pub fn run_parser(source: &str) -> Result { #[test] fn test_run_parser() { - let expr = run_parser(r#"$["name"][10]"#).unwrap(); + let expr = run_parser(r#"$[5::-2]"#).unwrap(); println!("{expr:?}"); } From 048276b66e81dde6b1d1e02da4f0f93cc3802e63 Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 14:03:42 +0800 Subject: [PATCH 11/16] parse rest Signed-off-by: tison --- spath/src/lib.rs | 4 +++- spath/src/parser/mod.rs | 12 ++++++------ spath/src/parser/parse.rs | 21 ++++++++++++++++++++- spath/src/parser/runner.rs | 2 +- spath/src/spath.rs | 18 ++++++++++++++++++ 5 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 spath/src/spath.rs diff --git a/spath/src/lib.rs b/spath/src/lib.rs index e569f3c..378eb3a 100644 --- a/spath/src/lib.rs +++ b/spath/src/lib.rs @@ -18,9 +18,11 @@ pub use value::*; mod error; pub use error::*; +mod spath; +pub use spath::*; + #[cfg(feature = "json")] mod json; -#[allow(dead_code)] mod parser; #[cfg(test)] diff --git a/spath/src/parser/mod.rs b/spath/src/parser/mod.rs index 19c24d3..dca0563 100644 --- a/spath/src/parser/mod.rs +++ b/spath/src/parser/mod.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod ast; -mod error; -mod parse; -mod range; -mod runner; -mod token; +pub(crate) mod ast; +pub(crate) mod error; +pub(crate) mod parse; +pub(crate) mod range; +pub(crate) mod runner; +pub(crate) mod token; diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs index 81efd4f..dfbf2ee 100644 --- a/spath/src/parser/parse.rs +++ b/spath/src/parser/parse.rs @@ -61,11 +61,30 @@ impl<'a> Parser<'a> { let selectors = self.parse_bracketed_selector()?; Ok(Some(Segment::Child { selectors })) } - // TODO(tisonkun): handle ..(descendant segment), .identifier, and .* + TokenKind::Dot => { + let token = self.next_token(); + match token.kind { + TokenKind::Asterisk => Ok(Some(Segment::Child { + selectors: vec![Selector::Wildcard], + })), + TokenKind::Ident => { + let name = token.text().to_string(); + Ok(Some(Segment::Descendant { + selectors: vec![Selector::Identifier { name }], + })) + } + TokenKind::Dot => self.parse_descendant_segment().map(Some), + _ => Err(ParseError::unexpected_token(token.span)), + } + } _ => Err(ParseError::unexpected_token(token.span)), } } + fn parse_descendant_segment(&mut self) -> Result { + todo!() + } + fn parse_bracketed_selector(&mut self) -> Result, ParseError> { let mut selectors = vec![]; let mut prev_comma = false; diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs index deb65f6..956118b 100644 --- a/spath/src/parser/runner.rs +++ b/spath/src/parser/runner.rs @@ -29,6 +29,6 @@ pub fn run_parser(source: &str) -> Result { #[test] fn test_run_parser() { - let expr = run_parser(r#"$[5::-2]"#).unwrap(); + let expr = run_parser(r#"$[0,1,2,3,4]"#).unwrap(); println!("{expr:?}"); } diff --git a/spath/src/spath.rs b/spath/src/spath.rs new file mode 100644 index 0000000..3cdcedf --- /dev/null +++ b/spath/src/spath.rs @@ -0,0 +1,18 @@ +use crate::parser::ast::Expr; +use crate::parser::error::ParseError; +use crate::parser::runner::run_parser; +use std::str::FromStr; + +#[derive(Debug, Clone)] +pub struct SPath { + pub expr: Expr, +} + +impl FromStr for SPath { + type Err = ParseError; + + fn from_str(s: &str) -> Result { + let expr = run_parser(s)?; + Ok(SPath { expr }) + } +} From 026f57049fca8d02dfe683908c5a7ae11b5dded9 Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 14:05:56 +0800 Subject: [PATCH 12/16] Descendant Signed-off-by: tison --- spath/src/parser/parse.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs index dfbf2ee..37836e6 100644 --- a/spath/src/parser/parse.rs +++ b/spath/src/parser/parse.rs @@ -82,7 +82,23 @@ impl<'a> Parser<'a> { } fn parse_descendant_segment(&mut self) -> Result { - todo!() + let token = self.next_token(); + match token.kind { + TokenKind::LBracket => { + let selectors = self.parse_bracketed_selector()?; + Ok((Segment::Descendant { selectors })) + } + TokenKind::Asterisk => Ok((Segment::Descendant { + selectors: vec![Selector::Wildcard], + })), + TokenKind::Ident => { + let name = token.text().to_string(); + Ok((Segment::Descendant { + selectors: vec![Selector::Identifier { name }], + })) + } + _ => Err(ParseError::unexpected_token(token.span)), + } } fn parse_bracketed_selector(&mut self) -> Result, ParseError> { From 9632b921c880c08c8caebe948505b94693b45e68 Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 14:07:27 +0800 Subject: [PATCH 13/16] Clippy Signed-off-by: tison --- spath/src/parser/{ast.rs => expr.rs} | 0 spath/src/parser/mod.rs | 2 +- spath/src/parser/parse.rs | 20 +++++++++++--------- spath/src/parser/runner.rs | 2 +- spath/src/spath.rs | 19 +++++++++++++++++-- 5 files changed, 30 insertions(+), 13 deletions(-) rename spath/src/parser/{ast.rs => expr.rs} (100%) diff --git a/spath/src/parser/ast.rs b/spath/src/parser/expr.rs similarity index 100% rename from spath/src/parser/ast.rs rename to spath/src/parser/expr.rs diff --git a/spath/src/parser/mod.rs b/spath/src/parser/mod.rs index dca0563..e00d305 100644 --- a/spath/src/parser/mod.rs +++ b/spath/src/parser/mod.rs @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub(crate) mod ast; pub(crate) mod error; +pub(crate) mod expr; pub(crate) mod parse; pub(crate) mod range; pub(crate) mod runner; diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs index 37836e6..a6d4525 100644 --- a/spath/src/parser/parse.rs +++ b/spath/src/parser/parse.rs @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::parser::ast::Segment; -use crate::parser::ast::{Expr, Selector}; +use std::iter::Peekable; + use crate::parser::error::ParseError; +use crate::parser::expr::Expr; +use crate::parser::expr::Segment; +use crate::parser::expr::Selector; use crate::parser::token::Token; use crate::parser::token::TokenKind; -use std::iter::Peekable; #[derive(Debug)] pub struct Parser<'a> { @@ -86,16 +88,16 @@ impl<'a> Parser<'a> { match token.kind { TokenKind::LBracket => { let selectors = self.parse_bracketed_selector()?; - Ok((Segment::Descendant { selectors })) + Ok(Segment::Descendant { selectors }) } - TokenKind::Asterisk => Ok((Segment::Descendant { + TokenKind::Asterisk => Ok(Segment::Descendant { selectors: vec![Selector::Wildcard], - })), + }), TokenKind::Ident => { let name = token.text().to_string(); - Ok((Segment::Descendant { + Ok(Segment::Descendant { selectors: vec![Selector::Identifier { name }], - })) + }) } _ => Err(ParseError::unexpected_token(token.span)), } @@ -219,7 +221,7 @@ fn parse_string(token: Token) -> Result { let mut chars = text.chars(); let quote = chars.next().expect("quote char always exist"); - if chars.next_back().map_or(true, |ch| ch != quote) { + if chars.next_back() != Some(quote) { return Err(ParseError::new(token.span, "mismatched quote")); } diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs index 956118b..dd508b7 100644 --- a/spath/src/parser/runner.rs +++ b/spath/src/parser/runner.rs @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::parser::ast::Expr; use crate::parser::error::ParseError; +use crate::parser::expr::Expr; use crate::parser::parse::Parser; use crate::parser::token::Token; use crate::parser::token::Tokenizer; diff --git a/spath/src/spath.rs b/spath/src/spath.rs index 3cdcedf..7071fae 100644 --- a/spath/src/spath.rs +++ b/spath/src/spath.rs @@ -1,7 +1,22 @@ -use crate::parser::ast::Expr; +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::str::FromStr; + use crate::parser::error::ParseError; +use crate::parser::expr::Expr; use crate::parser::runner::run_parser; -use std::str::FromStr; #[derive(Debug, Clone)] pub struct SPath { From b02500d1569174638321bae1ed83e0d86411343e Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 14:25:36 +0800 Subject: [PATCH 14/16] binder Signed-off-by: tison --- spath/src/lib.rs | 4 +- spath/src/parser/{expr.rs => ast.rs} | 7 -- spath/src/parser/mod.rs | 4 +- spath/src/parser/parse.rs | 11 ++-- spath/src/parser/runner.rs | 12 +--- spath/src/parser/token.rs | 2 +- spath/src/spath.rs | 98 +++++++++++++++++++++++++--- 7 files changed, 102 insertions(+), 36 deletions(-) rename spath/src/parser/{expr.rs => ast.rs} (91%) diff --git a/spath/src/lib.rs b/spath/src/lib.rs index 378eb3a..cd1c062 100644 --- a/spath/src/lib.rs +++ b/spath/src/lib.rs @@ -21,9 +21,11 @@ pub use error::*; mod spath; pub use spath::*; +mod parser; +use parser::error::ParseError; + #[cfg(feature = "json")] mod json; -mod parser; #[cfg(test)] fn manifest_dir() -> std::path::PathBuf { diff --git a/spath/src/parser/expr.rs b/spath/src/parser/ast.rs similarity index 91% rename from spath/src/parser/expr.rs rename to spath/src/parser/ast.rs index 3eb6f97..e0d4e7c 100644 --- a/spath/src/parser/expr.rs +++ b/spath/src/parser/ast.rs @@ -12,13 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -/// A valid SPath expression. -#[derive(Debug, Clone)] -pub enum Expr { - /// Consists of a series of segments. - Segments { segments: Vec }, -} - #[derive(Debug, Clone)] pub enum Segment { /// §2.5.1 Child Segment. diff --git a/spath/src/parser/mod.rs b/spath/src/parser/mod.rs index e00d305..b544b4c 100644 --- a/spath/src/parser/mod.rs +++ b/spath/src/parser/mod.rs @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -pub(crate) mod error; -pub(crate) mod expr; +pub(crate) mod ast; +pub mod error; pub(crate) mod parse; pub(crate) mod range; pub(crate) mod runner; diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs index a6d4525..7761358 100644 --- a/spath/src/parser/parse.rs +++ b/spath/src/parser/parse.rs @@ -14,12 +14,11 @@ use std::iter::Peekable; -use crate::parser::error::ParseError; -use crate::parser::expr::Expr; -use crate::parser::expr::Segment; -use crate::parser::expr::Selector; +use crate::parser::ast::Segment; +use crate::parser::ast::Selector; use crate::parser::token::Token; use crate::parser::token::TokenKind; +use crate::ParseError; #[derive(Debug)] pub struct Parser<'a> { @@ -38,7 +37,7 @@ impl<'a> Parser<'a> { } } - pub fn parse(&mut self) -> Result { + pub fn parse(&mut self) -> Result, ParseError> { let first = self.next_token(); // §2.2.1 (Root Identifier) Syntax @@ -52,7 +51,7 @@ impl<'a> Parser<'a> { while let Some(segment) = self.parse_segment()? { segments.push(segment); } - Ok(Expr::Segments { segments }) + Ok(segments) } fn parse_segment(&mut self) -> Result, ParseError> { diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs index dd508b7..05d23af 100644 --- a/spath/src/parser/runner.rs +++ b/spath/src/parser/runner.rs @@ -12,23 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::parser::error::ParseError; -use crate::parser::expr::Expr; +use crate::parser::ast::Segment; use crate::parser::parse::Parser; use crate::parser::token::Token; use crate::parser::token::Tokenizer; +use crate::ParseError; pub fn run_tokenizer(source: &str) -> Result, ParseError> { Tokenizer::new(source).collect::>() } -pub fn run_parser(source: &str) -> Result { +pub fn run_parser(source: &str) -> Result, ParseError> { let tokens = run_tokenizer(source)?; Parser::new(source, tokens).parse() } - -#[test] -fn test_run_parser() { - let expr = run_parser(r#"$[0,1,2,3,4]"#).unwrap(); - println!("{expr:?}"); -} diff --git a/spath/src/parser/token.rs b/spath/src/parser/token.rs index 47affae..3673acc 100644 --- a/spath/src/parser/token.rs +++ b/spath/src/parser/token.rs @@ -17,7 +17,7 @@ use std::fmt; use logos::Lexer; use logos::Logos; -use crate::parser::error::ParseError; +use crate::ParseError; use crate::parser::range::Range; #[derive(Clone, Copy, PartialEq, Eq)] diff --git a/spath/src/spath.rs b/spath/src/spath.rs index 7071fae..2b6fc52 100644 --- a/spath/src/spath.rs +++ b/spath/src/spath.rs @@ -12,22 +12,100 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::str::FromStr; - -use crate::parser::error::ParseError; -use crate::parser::expr::Expr; +use crate::parser::ast::{Segment, Selector}; use crate::parser::runner::run_parser; +use crate::ParseError; +use crate::Value; #[derive(Debug, Clone)] pub struct SPath { - pub expr: Expr, + segments: Vec, } -impl FromStr for SPath { - type Err = ParseError; +impl SPath { + pub fn eval(&self, value: Value) -> Option { + None + } +} + +#[derive(Debug, Clone)] +enum EvalSegment { + /// §2.5.1 Child Segment. + Child { + /// The selectors of the child segment. + selectors: Vec, + }, + /// §2.5.2 Descendant Segment. + Descendant { + /// The selectors of the descendant segment. + selectors: Vec, + }, +} + +#[derive(Debug, Clone)] +enum EvalSelector { + /// §2.3.2 Wildcard Selector. + Wildcard, + /// §2.3.1 Name Selector. + Identifier { + /// The name of the selector. + name: String, + }, + /// §2.3.3 Index Selector. + Index { + /// The index of the selector. + index: i64, + }, + /// §2.3.4 Array Slice Selector. + Slice { + /// The start index of the slice, inclusive. Default to 0. + start: i64, + /// The end index of the slice, exclusive. Default to the length of the array. + end: Option, + /// The step to iterate the slice. Default to 1. + step: i64, + }, +} + +#[derive(Debug)] +pub struct Binder { + segments: Vec, +} + +impl Binder { + /// Create a new binder by parsing the spath expression. + pub fn parse(source: &str) -> Result { + let segments = run_parser(source)?; + Ok(Self { segments }) + } + + /// Bind the segments to the eval context. + pub fn bind(&self) -> SPath { + let segments = self.segments.iter().map(|s| self.bind_segment(s)).collect(); + SPath { segments } + } + + fn bind_segment(&self, segment: &Segment) -> EvalSegment { + match segment { + Segment::Child { selectors } => EvalSegment::Child { + selectors: selectors.iter().map(|s| self.bind_selector(s)).collect(), + }, + Segment::Descendant { selectors } => EvalSegment::Descendant { + selectors: selectors.iter().map(|s| self.bind_selector(s)).collect(), + }, + } + } - fn from_str(s: &str) -> Result { - let expr = run_parser(s)?; - Ok(SPath { expr }) + fn bind_selector(&self, selector: &Selector) -> EvalSelector { + match selector { + Selector::Wildcard => EvalSelector::Wildcard, + Selector::Identifier { name } => EvalSelector::Identifier { name: name.clone() }, + Selector::Index { index } => EvalSelector::Index { index: *index }, + Selector::Slice { start, end, step } => EvalSelector::Slice { + start: *start, + end: *end, + step: *step, + }, + } } } From 6dbac7e3952294c12e7de70e8673ee9442ba845b Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 14:49:31 +0800 Subject: [PATCH 15/16] fixu Signed-off-by: tison --- spath/src/error.rs | 4 +++ spath/src/lib.rs | 4 +-- spath/src/parser/mod.rs | 2 +- spath/src/parser/parse.rs | 2 +- spath/src/parser/runner.rs | 2 +- spath/src/parser/token.rs | 2 +- spath/src/spath.rs | 57 +++++++++++++++++++++----------------- 7 files changed, 40 insertions(+), 33 deletions(-) diff --git a/spath/src/error.rs b/spath/src/error.rs index 220d68f..191a5ed 100644 --- a/spath/src/error.rs +++ b/spath/src/error.rs @@ -12,6 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#[derive(Debug, thiserror::Error)] +#[error("{0}")] +pub struct BindError(pub String); + #[derive(Debug, thiserror::Error)] #[error("{0}")] pub struct EvalError(pub String); diff --git a/spath/src/lib.rs b/spath/src/lib.rs index cd1c062..378eb3a 100644 --- a/spath/src/lib.rs +++ b/spath/src/lib.rs @@ -21,11 +21,9 @@ pub use error::*; mod spath; pub use spath::*; -mod parser; -use parser::error::ParseError; - #[cfg(feature = "json")] mod json; +mod parser; #[cfg(test)] fn manifest_dir() -> std::path::PathBuf { diff --git a/spath/src/parser/mod.rs b/spath/src/parser/mod.rs index b544b4c..dca0563 100644 --- a/spath/src/parser/mod.rs +++ b/spath/src/parser/mod.rs @@ -13,7 +13,7 @@ // limitations under the License. pub(crate) mod ast; -pub mod error; +pub(crate) mod error; pub(crate) mod parse; pub(crate) mod range; pub(crate) mod runner; diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs index 7761358..01537c9 100644 --- a/spath/src/parser/parse.rs +++ b/spath/src/parser/parse.rs @@ -18,7 +18,7 @@ use crate::parser::ast::Segment; use crate::parser::ast::Selector; use crate::parser::token::Token; use crate::parser::token::TokenKind; -use crate::ParseError; +use crate::parser::error::ParseError; #[derive(Debug)] pub struct Parser<'a> { diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs index 05d23af..cae78ae 100644 --- a/spath/src/parser/runner.rs +++ b/spath/src/parser/runner.rs @@ -16,7 +16,7 @@ use crate::parser::ast::Segment; use crate::parser::parse::Parser; use crate::parser::token::Token; use crate::parser::token::Tokenizer; -use crate::ParseError; +use crate::parser::error::ParseError; pub fn run_tokenizer(source: &str) -> Result, ParseError> { Tokenizer::new(source).collect::>() diff --git a/spath/src/parser/token.rs b/spath/src/parser/token.rs index 3673acc..47affae 100644 --- a/spath/src/parser/token.rs +++ b/spath/src/parser/token.rs @@ -17,7 +17,7 @@ use std::fmt; use logos::Lexer; use logos::Logos; -use crate::ParseError; +use crate::parser::error::ParseError; use crate::parser::range::Range; #[derive(Clone, Copy, PartialEq, Eq)] diff --git a/spath/src/spath.rs b/spath/src/spath.rs index 2b6fc52..d27ecd7 100644 --- a/spath/src/spath.rs +++ b/spath/src/spath.rs @@ -14,8 +14,7 @@ use crate::parser::ast::{Segment, Selector}; use crate::parser::runner::run_parser; -use crate::ParseError; -use crate::Value; +use crate::{BindError, Value}; #[derive(Debug, Clone)] pub struct SPath { @@ -23,8 +22,21 @@ pub struct SPath { } impl SPath { + pub fn new(source: &str) -> Result { + let segments = run_parser(source).map_err(|err| BindError(format!("{err}")))?; + let binder = Binder {}; + Ok(binder.bind(segments)) + } + pub fn eval(&self, value: Value) -> Option { - None + let mut root = value; + for segment in self.segments { + if let Some(result) = self.eval_segment(segment, result, result) { + result + } else { + return None; + } + } } } @@ -67,45 +79,38 @@ enum EvalSelector { }, } -#[derive(Debug)] -pub struct Binder { - segments: Vec, -} +#[derive(Debug, Clone)] +pub struct Binder {} impl Binder { - /// Create a new binder by parsing the spath expression. - pub fn parse(source: &str) -> Result { - let segments = run_parser(source)?; - Ok(Self { segments }) - } - - /// Bind the segments to the eval context. - pub fn bind(&self) -> SPath { - let segments = self.segments.iter().map(|s| self.bind_segment(s)).collect(); + fn bind(&self, segments: Vec) -> SPath { + let segments = segments.into_iter().map(|s| self.bind_segment(s)).collect(); SPath { segments } } - fn bind_segment(&self, segment: &Segment) -> EvalSegment { + fn bind_segment(&self, segment: Segment) -> EvalSegment { match segment { Segment::Child { selectors } => EvalSegment::Child { - selectors: selectors.iter().map(|s| self.bind_selector(s)).collect(), + selectors: selectors + .into_iter() + .map(|s| self.bind_selector(s)) + .collect(), }, Segment::Descendant { selectors } => EvalSegment::Descendant { - selectors: selectors.iter().map(|s| self.bind_selector(s)).collect(), + selectors: selectors + .into_iter() + .map(|s| self.bind_selector(s)) + .collect(), }, } } - fn bind_selector(&self, selector: &Selector) -> EvalSelector { + fn bind_selector(&self, selector: Selector) -> EvalSelector { match selector { Selector::Wildcard => EvalSelector::Wildcard, - Selector::Identifier { name } => EvalSelector::Identifier { name: name.clone() }, + Selector::Identifier { name } => EvalSelector::Identifier { name }, Selector::Index { index } => EvalSelector::Index { index: *index }, - Selector::Slice { start, end, step } => EvalSelector::Slice { - start: *start, - end: *end, - step: *step, - }, + Selector::Slice { start, end, step } => EvalSelector::Slice { start, end, step }, } } } From 22bb3b854cf63cf74ad0b2948af4342211fa3a85 Mon Sep 17 00:00:00 2001 From: tison Date: Mon, 6 Jan 2025 15:19:39 +0800 Subject: [PATCH 16/16] simplest case Signed-off-by: tison --- spath/src/json/tests.rs | 8 ++--- spath/src/lib.rs | 10 ++++++ spath/src/parser/parse.rs | 4 +-- spath/src/parser/runner.rs | 2 +- spath/src/spath.rs | 63 +++++++++++++++++++++++++++++++++----- spath/src/tests.rs | 52 +++++++++++++++++++++++++++++++ 6 files changed, 122 insertions(+), 17 deletions(-) create mode 100644 spath/src/tests.rs diff --git a/spath/src/json/tests.rs b/spath/src/json/tests.rs index 69908fb..c47d6da 100644 --- a/spath/src/json/tests.rs +++ b/spath/src/json/tests.rs @@ -17,17 +17,13 @@ use googletest::matchers::eq; use insta::assert_snapshot; use serde_json::Value as JsonValue; -use crate::manifest_dir; +use crate::json_testdata; use crate::Value; fn assert_testdata_identical(path: &str) -> String { - let path = manifest_dir().join("testdata").join(path); - let literal = std::fs::read_to_string(&path).unwrap(); - - let json_value = serde_json::from_str::(&literal).unwrap(); + let json_value = json_testdata(path); let value = Value::from(json_value.clone()); assert_that!(json_value, eq(&JsonValue::from(value.clone()))); - format!("{:?}", value) } diff --git a/spath/src/lib.rs b/spath/src/lib.rs index 378eb3a..4127f44 100644 --- a/spath/src/lib.rs +++ b/spath/src/lib.rs @@ -25,8 +25,18 @@ pub use spath::*; mod json; mod parser; +#[cfg(test)] +mod tests; + #[cfg(test)] fn manifest_dir() -> std::path::PathBuf { let dir = env!("CARGO_MANIFEST_DIR"); std::path::PathBuf::from(dir).canonicalize().unwrap() } + +#[cfg(test)] +fn json_testdata(filename: &str) -> serde_json::Value { + let path = manifest_dir().join("testdata").join(filename); + let content = std::fs::read_to_string(path).unwrap(); + serde_json::from_str(&content).unwrap() +} diff --git a/spath/src/parser/parse.rs b/spath/src/parser/parse.rs index 01537c9..e38a120 100644 --- a/spath/src/parser/parse.rs +++ b/spath/src/parser/parse.rs @@ -16,9 +16,9 @@ use std::iter::Peekable; use crate::parser::ast::Segment; use crate::parser::ast::Selector; +use crate::parser::error::ParseError; use crate::parser::token::Token; use crate::parser::token::TokenKind; -use crate::parser::error::ParseError; #[derive(Debug)] pub struct Parser<'a> { @@ -70,7 +70,7 @@ impl<'a> Parser<'a> { })), TokenKind::Ident => { let name = token.text().to_string(); - Ok(Some(Segment::Descendant { + Ok(Some(Segment::Child { selectors: vec![Selector::Identifier { name }], })) } diff --git a/spath/src/parser/runner.rs b/spath/src/parser/runner.rs index cae78ae..423609b 100644 --- a/spath/src/parser/runner.rs +++ b/spath/src/parser/runner.rs @@ -13,10 +13,10 @@ // limitations under the License. use crate::parser::ast::Segment; +use crate::parser::error::ParseError; use crate::parser::parse::Parser; use crate::parser::token::Token; use crate::parser::token::Tokenizer; -use crate::parser::error::ParseError; pub fn run_tokenizer(source: &str) -> Result, ParseError> { Tokenizer::new(source).collect::>() diff --git a/spath/src/spath.rs b/spath/src/spath.rs index d27ecd7..eb14390 100644 --- a/spath/src/spath.rs +++ b/spath/src/spath.rs @@ -12,9 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::parser::ast::{Segment, Selector}; +use crate::parser::ast::Segment; +use crate::parser::ast::Selector; use crate::parser::runner::run_parser; -use crate::{BindError, Value}; +use crate::BindError; +use crate::Value; #[derive(Debug, Clone)] pub struct SPath { @@ -28,15 +30,16 @@ impl SPath { Ok(binder.bind(segments)) } - pub fn eval(&self, value: Value) -> Option { - let mut root = value; - for segment in self.segments { - if let Some(result) = self.eval_segment(segment, result, result) { - result + pub fn eval(&self, root: &Value) -> Option { + let mut result = root.clone(); + for segment in &self.segments { + if let Some(res) = segment.eval(root, result) { + result = res; } else { return None; } } + Some(result.clone()) } } @@ -54,6 +57,29 @@ enum EvalSegment { }, } +impl EvalSegment { + fn eval(&self, root: &Value, value: Value) -> Option { + match self { + EvalSegment::Child { selectors } => { + let mut result = value; + for selector in selectors { + if let Some(res) = selector.eval(root, result) { + result = res; + } else { + return None; + } + } + Some(result) + } + EvalSegment::Descendant { + selectors: _selectors, + } => { + todo!("descendant segment") + } + } + } +} + #[derive(Debug, Clone)] enum EvalSelector { /// §2.3.2 Wildcard Selector. @@ -79,6 +105,27 @@ enum EvalSelector { }, } +impl EvalSelector { + fn eval(&self, _root: &Value, value: Value) -> Option { + match self { + EvalSelector::Wildcard => Some(value), + EvalSelector::Identifier { name } => { + if let Value::Object(map) = value { + map.get(name).cloned() + } else { + None + } + } + EvalSelector::Index { index } => { + todo!("index selector: {index}") + } + EvalSelector::Slice { start, end, step } => { + todo!("slice selector: {start}, {end:?}, {step}") + } + } + } +} + #[derive(Debug, Clone)] pub struct Binder {} @@ -109,7 +156,7 @@ impl Binder { match selector { Selector::Wildcard => EvalSelector::Wildcard, Selector::Identifier { name } => EvalSelector::Identifier { name }, - Selector::Index { index } => EvalSelector::Index { index: *index }, + Selector::Index { index } => EvalSelector::Index { index }, Selector::Slice { start, end, step } => EvalSelector::Slice { start, end, step }, } } diff --git a/spath/src/tests.rs b/spath/src/tests.rs new file mode 100644 index 0000000..1ba5bf5 --- /dev/null +++ b/spath/src/tests.rs @@ -0,0 +1,52 @@ +// Copyright 2024 tison +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use googletest::assert_that; +use googletest::prelude::eq; +use insta::assert_debug_snapshot; + +use crate::json_testdata; +use crate::SPath; +use crate::Value; + +fn eval_spath(spath: &str, value: &Value) -> Option { + let spath = SPath::new(spath).unwrap(); + spath.eval(value) +} + +#[test] +fn test_root_wildcard() { + let value = json_testdata("rfc-9535-example-1.json"); + let value = Value::from(value); + + let result = eval_spath("$", &value).unwrap(); + assert_that!(result, eq(&value)); + let result = eval_spath("$.*", &value).unwrap(); + assert_that!(result, eq(&value)); + let result = eval_spath("$[*]", &value).unwrap(); + assert_that!(result, eq(&value)); +} + +#[test] +fn test_basic_name_selector() { + let value = json_testdata("rfc-9535-example-1.json"); + let value = Value::from(value); + + let result = eval_spath(r#"$["store"]['bicycle']"#, &value).unwrap(); + assert_debug_snapshot!(result, @r#"{"color":'red',"price":399}"#); + let result = eval_spath(r#"$.store.bicycle.color"#, &value).unwrap(); + assert_debug_snapshot!(result, @"'red'"); + let result = eval_spath(r#"$.store.book.*"#, &value).unwrap(); + assert_debug_snapshot!(result, @r#"[{"author":'Nigel Rees',"category":'reference',"price":8.95,"title":'Sayings of the Century'},{"author":'Evelyn Waugh',"category":'fiction',"price":12.99,"title":'Sword of Honour'},{"author":'Herman Melville',"category":'fiction',"isbn":'0-553-21311-3',"price":8.99,"title":'Moby Dick'},{"author":'J. R. R. Tolkien',"category":'fiction',"isbn":'0-395-19395-8',"price":22.99,"title":'The Lord of the Rings'}]"#); +}