From a0443a9dae32c69edc98f369a58e04142d5d1fd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Boudereau?= Date: Fri, 15 Sep 2023 18:25:54 +0200 Subject: [PATCH] rust: add a new deserializer version (#3) --- .github/workflows/bench.yml | 8 +- rust/serdejsonbench/examples/bench.rs | 106 ++++++++++++------ .../serde_json_stream_deserializer.rs | 42 ++++--- .../stream_deserializer_api_example.rs | 22 ++-- rust/serdejsonbench/src/lib.rs | 22 ++-- .../src/serde_json_stream_deserializer2.rs | 59 ++++++++++ 6 files changed, 177 insertions(+), 82 deletions(-) create mode 100644 rust/serdejsonbench/src/serde_json_stream_deserializer2.rs diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index e294528..6d2750f 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -5,9 +5,6 @@ on: branches: [ "main" ] pull_request: branches: [ "*" ] - paths: - - '.github/**-java.yml' - - 'java/**' jobs: bench: @@ -18,7 +15,10 @@ jobs: - name: setup-files run: ./uncompress.sh - - name: rust/benchmark + - name: rust/benchmark/v2 + run: cd rust/serdejsonbench && cargo build -r --examples && ./target/release/examples/bench --console --times 10 --method v2 + + - name: rust/benchmark/v1 run: cd rust/serdejsonbench && cargo build -r --examples && ./target/release/examples/bench --console --times 10 - uses: actions/setup-dotnet@v3 diff --git a/rust/serdejsonbench/examples/bench.rs b/rust/serdejsonbench/examples/bench.rs index 846ab5e..2bbbf17 100644 --- a/rust/serdejsonbench/examples/bench.rs +++ b/rust/serdejsonbench/examples/bench.rs @@ -1,36 +1,70 @@ -use serdejsonbench::JsonIterator; -use std::{io::Write, time::Instant}; - -fn parse() { - let iter = JsonIterator::new(r#"../../json/256MB.json"#.into()); - let mut count = 0; - for json in iter { - let json = json.unwrap(); - assert_eq!("FULL", json.delta_mode); - count = count + 1; - } - assert_eq!(68495, count); -} - -fn main() { - let args: Vec = std::env::args().skip(1).collect(); - - if args.len() == 3 && args[0] == "--console" && args[1] == "--times" { - let now = Instant::now(); - let times: u32 = args[2].as_str().parse().unwrap(); - - for _ in 0..times { - print!("."); - std::io::stdout().flush().unwrap(); - parse(); - } - - let elapsed = now.elapsed().as_millis(); - let avg = elapsed / u128::from(times); - println!("done in {elapsed}ms avg {avg}ms"); - } else { - panic!("bad arguments {args:?}") - } - - println!("done"); -} +use serdejsonbench::JsonIterator; +use serdejsonbench::{iter_json_array, Json}; +use std::fs::File; +use std::{io::Write, time::Instant}; + +use std::io::BufReader; + +fn parsev2() { + let reader = BufReader::with_capacity(8192, File::open(r#"../../json/256MB.json"#).unwrap()); + let iter = iter_json_array(reader); + let mut count = 0; + for json in iter { + let json: Json = json.unwrap(); + assert_eq!("FULL", json.delta_mode); + count = count + 1; + } + assert_eq!(68495, count); +} + +fn parse() { + let iter = JsonIterator::new(r#"../../json/256MB.json"#.into()); + let mut count = 0; + for json in iter { + let json = json.unwrap(); + assert_eq!("FULL", json.delta_mode); + count = count + 1; + } + assert_eq!(68495, count); +} + +enum Method { + V1, + V2, +} + +fn main() { + let args: Vec = std::env::args().skip(1).collect(); + + if args.len() > 2 && args[0] == "--console" && args[1] == "--times" { + let now = Instant::now(); + let times: u32 = args[2].as_str().parse().unwrap(); + + let method = if args.len() > 4 && args[3] == "--method" && args[4] == "v2" { + println!("v2 version"); + Method::V2 + } else { + println!("v1 version"); + Method::V1 + }; + + let parse = match method { + Method::V1 => parse, + Method::V2 => parsev2, + }; + + for _ in 0..times { + print!("."); + std::io::stdout().flush().unwrap(); + parse(); + } + + let elapsed = now.elapsed().as_millis(); + let avg = elapsed / u128::from(times); + println!("done in {elapsed}ms avg {avg}ms"); + } else { + panic!("bad arguments {args:?}") + } + + println!("done"); +} diff --git a/rust/serdejsonbench/examples/serde_json_stream_deserializer.rs b/rust/serdejsonbench/examples/serde_json_stream_deserializer.rs index 79118d9..07ab6cd 100644 --- a/rust/serdejsonbench/examples/serde_json_stream_deserializer.rs +++ b/rust/serdejsonbench/examples/serde_json_stream_deserializer.rs @@ -1,22 +1,20 @@ -//https://github.com/serde-rs/json/issues/404#issuecomment-674293399 - -use serdejsonbench::JsonIterator; - -fn main() -> std::io::Result<()> { - let iter = JsonIterator::new( - r#"../../json/256MB.json"#.into(), - ); - let mut count = 0; - for json in iter { - let json = json.unwrap(); - assert_eq!("FULL", json.delta_mode); - count = count + 1; - if count % 1000 == 0 { - println!("{count}"); - } - } - - assert_eq!(68495, count); - println!("done"); - Ok(()) -} +//https://github.com/serde-rs/json/issues/404#issuecomment-674293399 + +use serdejsonbench::JsonIterator; + +fn main() -> std::io::Result<()> { + let iter = JsonIterator::new(r#"../../json/256MB.json"#.into()); + let mut count = 0; + for json in iter { + let json = json.unwrap(); + assert_eq!("FULL", json.delta_mode); + count = count + 1; + if count % 1000 == 0 { + println!("{count}"); + } + } + + assert_eq!(68495, count); + println!("done"); + Ok(()) +} diff --git a/rust/serdejsonbench/examples/stream_deserializer_api_example.rs b/rust/serdejsonbench/examples/stream_deserializer_api_example.rs index 7fc8839..cc7e240 100644 --- a/rust/serdejsonbench/examples/stream_deserializer_api_example.rs +++ b/rust/serdejsonbench/examples/stream_deserializer_api_example.rs @@ -1,11 +1,11 @@ -use serde_json::{Deserializer, Value}; - -fn main() { - let data = "{\"k\": 3}1\"cool\"\"stuff\" 3{} [0, 1, 2]"; - - let stream = Deserializer::from_str(data).into_iter::(); - - for value in stream { - println!("{}", value.unwrap()); - } -} \ No newline at end of file +use serde_json::{Deserializer, Value}; + +fn main() { + let data = "{\"k\": 3}1\"cool\"\"stuff\" 3{} [0, 1, 2]"; + + let stream = Deserializer::from_str(data).into_iter::(); + + for value in stream { + println!("{}", value.unwrap()); + } +} diff --git a/rust/serdejsonbench/src/lib.rs b/rust/serdejsonbench/src/lib.rs index 06c24e2..33b9300 100644 --- a/rust/serdejsonbench/src/lib.rs +++ b/rust/serdejsonbench/src/lib.rs @@ -1,9 +1,13 @@ -//https://github.com/serde-rs/json/issues/404#issuecomment-674293399 -mod types; -pub use types::Json; - -mod serde_json_stream_deserializer; -pub use serde_json_stream_deserializer::JsonIterator; - -mod serde_json_simple_deserializer; -pub use serde_json_simple_deserializer::{read_from_file, readone_from_file}; \ No newline at end of file +//https://github.com/serde-rs/json/issues/404#issuecomment-674293399 +mod types; +pub use types::Json; + +mod serde_json_stream_deserializer; +pub use serde_json_stream_deserializer::JsonIterator; + +mod serde_json_simple_deserializer; +pub use serde_json_simple_deserializer::{read_from_file, readone_from_file}; + +mod serde_json_stream_deserializer2; + +pub use serde_json_stream_deserializer2::iter_json_array; diff --git a/rust/serdejsonbench/src/serde_json_stream_deserializer2.rs b/rust/serdejsonbench/src/serde_json_stream_deserializer2.rs new file mode 100644 index 0000000..ba997b8 --- /dev/null +++ b/rust/serdejsonbench/src/serde_json_stream_deserializer2.rs @@ -0,0 +1,59 @@ +// https://github.com/serde-rs/json/issues/404#issuecomment-892957228 +use serde::de::DeserializeOwned; +use serde_json::{self, Deserializer}; +use std::io::{self, Read}; + +fn read_skipping_ws(mut reader: impl Read) -> io::Result { + loop { + let mut byte = 0u8; + reader.read_exact(std::slice::from_mut(&mut byte))?; + if !byte.is_ascii_whitespace() { + return Ok(byte); + } + } +} + +fn invalid_data(msg: &str) -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, msg) +} + +fn deserialize_single(reader: R) -> io::Result { + let next_obj = Deserializer::from_reader(reader).into_iter::().next(); + match next_obj { + Some(result) => result.map_err(Into::into), + None => Err(invalid_data("premature EOF")), + } +} + +fn yield_next_obj( + mut reader: R, + at_start: &mut bool, +) -> io::Result> { + if !*at_start { + *at_start = true; + if read_skipping_ws(&mut reader)? == b'[' { + // read the next char to see if the array is empty + let peek = read_skipping_ws(&mut reader)?; + if peek == b']' { + Ok(None) + } else { + deserialize_single(io::Cursor::new([peek]).chain(reader)).map(Some) + } + } else { + Err(invalid_data("`[` not found")) + } + } else { + match read_skipping_ws(&mut reader)? { + b',' => deserialize_single(reader).map(Some), + b']' => Ok(None), + _ => Err(invalid_data("`,` or `]` not found")), + } + } +} + +pub fn iter_json_array( + mut reader: R, +) -> impl Iterator> { + let mut at_start = false; + std::iter::from_fn(move || yield_next_obj(&mut reader, &mut at_start).transpose()) +}