Skip to content

Commit

Permalink
rust: add a new deserializer version (#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
cboudereau committed Sep 15, 2023
1 parent a847c1e commit a0443a9
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 82 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ on:
branches: [ "main" ]
pull_request:
branches: [ "*" ]
paths:
- '.github/**-java.yml'
- 'java/**'

jobs:
bench:
Expand All @@ -18,7 +15,10 @@ jobs:
- name: setup-files
run: ./uncompress.sh

- name: rust/benchmark
- name: rust/benchmark/v2
run: cd rust/serdejsonbench && cargo build -r --examples && ./target/release/examples/bench --console --times 10 --method v2

- name: rust/benchmark/v1
run: cd rust/serdejsonbench && cargo build -r --examples && ./target/release/examples/bench --console --times 10

- uses: actions/setup-dotnet@v3
Expand Down
106 changes: 70 additions & 36 deletions rust/serdejsonbench/examples/bench.rs
Original file line number Diff line number Diff line change
@@ -1,36 +1,70 @@
use serdejsonbench::JsonIterator;
use std::{io::Write, time::Instant};

fn parse() {
let iter = JsonIterator::new(r#"../../json/256MB.json"#.into());
let mut count = 0;
for json in iter {
let json = json.unwrap();
assert_eq!("FULL", json.delta_mode);
count = count + 1;
}
assert_eq!(68495, count);
}

fn main() {
let args: Vec<String> = std::env::args().skip(1).collect();

if args.len() == 3 && args[0] == "--console" && args[1] == "--times" {
let now = Instant::now();
let times: u32 = args[2].as_str().parse().unwrap();

for _ in 0..times {
print!(".");
std::io::stdout().flush().unwrap();
parse();
}

let elapsed = now.elapsed().as_millis();
let avg = elapsed / u128::from(times);
println!("done in {elapsed}ms avg {avg}ms");
} else {
panic!("bad arguments {args:?}")
}

println!("done");
}
use serdejsonbench::JsonIterator;
use serdejsonbench::{iter_json_array, Json};
use std::fs::File;
use std::{io::Write, time::Instant};

use std::io::BufReader;

fn parsev2() {
let reader = BufReader::with_capacity(8192, File::open(r#"../../json/256MB.json"#).unwrap());
let iter = iter_json_array(reader);
let mut count = 0;
for json in iter {
let json: Json = json.unwrap();
assert_eq!("FULL", json.delta_mode);
count = count + 1;
}
assert_eq!(68495, count);
}

fn parse() {
let iter = JsonIterator::new(r#"../../json/256MB.json"#.into());
let mut count = 0;
for json in iter {
let json = json.unwrap();
assert_eq!("FULL", json.delta_mode);
count = count + 1;
}
assert_eq!(68495, count);
}

enum Method {
V1,
V2,
}

fn main() {
let args: Vec<String> = std::env::args().skip(1).collect();

if args.len() > 2 && args[0] == "--console" && args[1] == "--times" {
let now = Instant::now();
let times: u32 = args[2].as_str().parse().unwrap();

let method = if args.len() > 4 && args[3] == "--method" && args[4] == "v2" {
println!("v2 version");
Method::V2
} else {
println!("v1 version");
Method::V1
};

let parse = match method {
Method::V1 => parse,
Method::V2 => parsev2,
};

for _ in 0..times {
print!(".");
std::io::stdout().flush().unwrap();
parse();
}

let elapsed = now.elapsed().as_millis();
let avg = elapsed / u128::from(times);
println!("done in {elapsed}ms avg {avg}ms");
} else {
panic!("bad arguments {args:?}")
}

println!("done");
}
42 changes: 20 additions & 22 deletions rust/serdejsonbench/examples/serde_json_stream_deserializer.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399

use serdejsonbench::JsonIterator;

fn main() -> std::io::Result<()> {
let iter = JsonIterator::new(
r#"../../json/256MB.json"#.into(),
);
let mut count = 0;
for json in iter {
let json = json.unwrap();
assert_eq!("FULL", json.delta_mode);
count = count + 1;
if count % 1000 == 0 {
println!("{count}");
}
}

assert_eq!(68495, count);
println!("done");
Ok(())
}
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399

use serdejsonbench::JsonIterator;

fn main() -> std::io::Result<()> {
let iter = JsonIterator::new(r#"../../json/256MB.json"#.into());
let mut count = 0;
for json in iter {
let json = json.unwrap();
assert_eq!("FULL", json.delta_mode);
count = count + 1;
if count % 1000 == 0 {
println!("{count}");
}
}

assert_eq!(68495, count);
println!("done");
Ok(())
}
22 changes: 11 additions & 11 deletions rust/serdejsonbench/examples/stream_deserializer_api_example.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use serde_json::{Deserializer, Value};

fn main() {
let data = "{\"k\": 3}1\"cool\"\"stuff\" 3{} [0, 1, 2]";

let stream = Deserializer::from_str(data).into_iter::<Value>();

for value in stream {
println!("{}", value.unwrap());
}
}
use serde_json::{Deserializer, Value};

fn main() {
let data = "{\"k\": 3}1\"cool\"\"stuff\" 3{} [0, 1, 2]";

let stream = Deserializer::from_str(data).into_iter::<Value>();

for value in stream {
println!("{}", value.unwrap());
}
}
22 changes: 13 additions & 9 deletions rust/serdejsonbench/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399
mod types;
pub use types::Json;

mod serde_json_stream_deserializer;
pub use serde_json_stream_deserializer::JsonIterator;

mod serde_json_simple_deserializer;
pub use serde_json_simple_deserializer::{read_from_file, readone_from_file};
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399
mod types;
pub use types::Json;

mod serde_json_stream_deserializer;
pub use serde_json_stream_deserializer::JsonIterator;

mod serde_json_simple_deserializer;
pub use serde_json_simple_deserializer::{read_from_file, readone_from_file};

mod serde_json_stream_deserializer2;

pub use serde_json_stream_deserializer2::iter_json_array;
59 changes: 59 additions & 0 deletions rust/serdejsonbench/src/serde_json_stream_deserializer2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// https://github.com/serde-rs/json/issues/404#issuecomment-892957228
use serde::de::DeserializeOwned;
use serde_json::{self, Deserializer};
use std::io::{self, Read};

fn read_skipping_ws(mut reader: impl Read) -> io::Result<u8> {
loop {
let mut byte = 0u8;
reader.read_exact(std::slice::from_mut(&mut byte))?;
if !byte.is_ascii_whitespace() {
return Ok(byte);
}
}
}

fn invalid_data(msg: &str) -> io::Error {
io::Error::new(io::ErrorKind::InvalidData, msg)
}

fn deserialize_single<T: DeserializeOwned, R: Read>(reader: R) -> io::Result<T> {
let next_obj = Deserializer::from_reader(reader).into_iter::<T>().next();
match next_obj {
Some(result) => result.map_err(Into::into),
None => Err(invalid_data("premature EOF")),
}
}

fn yield_next_obj<T: DeserializeOwned, R: Read>(
mut reader: R,
at_start: &mut bool,
) -> io::Result<Option<T>> {
if !*at_start {
*at_start = true;
if read_skipping_ws(&mut reader)? == b'[' {
// read the next char to see if the array is empty
let peek = read_skipping_ws(&mut reader)?;
if peek == b']' {
Ok(None)
} else {
deserialize_single(io::Cursor::new([peek]).chain(reader)).map(Some)
}
} else {
Err(invalid_data("`[` not found"))
}
} else {
match read_skipping_ws(&mut reader)? {
b',' => deserialize_single(reader).map(Some),
b']' => Ok(None),
_ => Err(invalid_data("`,` or `]` not found")),
}
}
}

pub fn iter_json_array<T: DeserializeOwned, R: Read>(
mut reader: R,
) -> impl Iterator<Item = Result<T, io::Error>> {
let mut at_start = false;
std::iter::from_fn(move || yield_next_obj(&mut reader, &mut at_start).transpose())
}

0 comments on commit a0443a9

Please sign in to comment.