Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rust: add a new deserializer version #3

Merged
merged 13 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ on:
branches: [ "main" ]
pull_request:
branches: [ "*" ]
paths:
- '.github/**-java.yml'
- 'java/**'

jobs:
bench:
Expand All @@ -18,7 +15,10 @@ jobs:
- name: setup-files
run: ./uncompress.sh

- name: rust/benchmark
- name: rust/benchmark/v2
run: cd rust/serdejsonbench && cargo build -r --examples && ./target/release/examples/bench --console --times 10 --method v2

- name: rust/benchmark/v1
run: cd rust/serdejsonbench && cargo build -r --examples && ./target/release/examples/bench --console --times 10

- uses: actions/setup-dotnet@v3
Expand Down
106 changes: 70 additions & 36 deletions rust/serdejsonbench/examples/bench.rs
Original file line number Diff line number Diff line change
@@ -1,36 +1,70 @@
use serdejsonbench::JsonIterator;
use std::{io::Write, time::Instant};

fn parse() {
let iter = JsonIterator::new(r#"../../json/256MB.json"#.into());
let mut count = 0;
for json in iter {
let json = json.unwrap();
assert_eq!("FULL", json.delta_mode);
count = count + 1;
}
assert_eq!(68495, count);
}

fn main() {
let args: Vec<String> = std::env::args().skip(1).collect();

if args.len() == 3 && args[0] == "--console" && args[1] == "--times" {
let now = Instant::now();
let times: u32 = args[2].as_str().parse().unwrap();

for _ in 0..times {
print!(".");
std::io::stdout().flush().unwrap();
parse();
}

let elapsed = now.elapsed().as_millis();
let avg = elapsed / u128::from(times);
println!("done in {elapsed}ms avg {avg}ms");
} else {
panic!("bad arguments {args:?}")
}

println!("done");
}
use serdejsonbench::JsonIterator;
use serdejsonbench::{iter_json_array, Json};
use std::fs::File;
use std::{io::Write, time::Instant};

use std::io::BufReader;

fn parsev2() {
let reader = BufReader::with_capacity(8192, File::open(r#"../../json/256MB.json"#).unwrap());
let iter = iter_json_array(reader);
let mut count = 0;
for json in iter {
let json: Json = json.unwrap();
assert_eq!("FULL", json.delta_mode);
count = count + 1;
}
assert_eq!(68495, count);
}

fn parse() {
let iter = JsonIterator::new(r#"../../json/256MB.json"#.into());
let mut count = 0;
for json in iter {
let json = json.unwrap();
assert_eq!("FULL", json.delta_mode);
count = count + 1;
}
assert_eq!(68495, count);
}

enum Method {
V1,
V2,
}

fn main() {
let args: Vec<String> = std::env::args().skip(1).collect();

if args.len() > 2 && args[0] == "--console" && args[1] == "--times" {
let now = Instant::now();
let times: u32 = args[2].as_str().parse().unwrap();

let method = if args.len() > 4 && args[3] == "--method" && args[4] == "v2" {
println!("v2 version");
Method::V2
} else {
println!("v1 version");
Method::V1
};

let parse = match method {
Method::V1 => parse,
Method::V2 => parsev2,
};

for _ in 0..times {
print!(".");
std::io::stdout().flush().unwrap();
parse();
}

let elapsed = now.elapsed().as_millis();
let avg = elapsed / u128::from(times);
println!("done in {elapsed}ms avg {avg}ms");
} else {
panic!("bad arguments {args:?}")
}

println!("done");
}
42 changes: 20 additions & 22 deletions rust/serdejsonbench/examples/serde_json_stream_deserializer.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399

use serdejsonbench::JsonIterator;

fn main() -> std::io::Result<()> {
let iter = JsonIterator::new(
r#"../../json/256MB.json"#.into(),
);
let mut count = 0;
for json in iter {
let json = json.unwrap();
assert_eq!("FULL", json.delta_mode);
count = count + 1;
if count % 1000 == 0 {
println!("{count}");
}
}

assert_eq!(68495, count);
println!("done");
Ok(())
}
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399

use serdejsonbench::JsonIterator;

fn main() -> std::io::Result<()> {
let iter = JsonIterator::new(r#"../../json/256MB.json"#.into());
let mut count = 0;
for json in iter {
let json = json.unwrap();
assert_eq!("FULL", json.delta_mode);
count = count + 1;
if count % 1000 == 0 {
println!("{count}");
}
}

assert_eq!(68495, count);
println!("done");
Ok(())
}
22 changes: 11 additions & 11 deletions rust/serdejsonbench/examples/stream_deserializer_api_example.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use serde_json::{Deserializer, Value};
fn main() {
let data = "{\"k\": 3}1\"cool\"\"stuff\" 3{} [0, 1, 2]";
let stream = Deserializer::from_str(data).into_iter::<Value>();
for value in stream {
println!("{}", value.unwrap());
}
}
use serde_json::{Deserializer, Value};

fn main() {
let data = "{\"k\": 3}1\"cool\"\"stuff\" 3{} [0, 1, 2]";

let stream = Deserializer::from_str(data).into_iter::<Value>();

for value in stream {
println!("{}", value.unwrap());
}
}
22 changes: 13 additions & 9 deletions rust/serdejsonbench/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399
mod types;
pub use types::Json;

mod serde_json_stream_deserializer;
pub use serde_json_stream_deserializer::JsonIterator;

mod serde_json_simple_deserializer;
pub use serde_json_simple_deserializer::{read_from_file, readone_from_file};
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399
mod types;
pub use types::Json;

mod serde_json_stream_deserializer;
pub use serde_json_stream_deserializer::JsonIterator;

mod serde_json_simple_deserializer;
pub use serde_json_simple_deserializer::{read_from_file, readone_from_file};

mod serde_json_stream_deserializer2;

pub use serde_json_stream_deserializer2::iter_json_array;
59 changes: 59 additions & 0 deletions rust/serdejsonbench/src/serde_json_stream_deserializer2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// https://github.com/serde-rs/json/issues/404#issuecomment-892957228
use serde::de::DeserializeOwned;
use serde_json::{self, Deserializer};
use std::io::{self, Read};

fn read_skipping_ws(mut reader: impl Read) -> io::Result<u8> {
loop {
let mut byte = 0u8;
reader.read_exact(std::slice::from_mut(&mut byte))?;
if !byte.is_ascii_whitespace() {
return Ok(byte);
}
}
}

fn invalid_data(msg: &str) -> io::Error {
io::Error::new(io::ErrorKind::InvalidData, msg)
}

fn deserialize_single<T: DeserializeOwned, R: Read>(reader: R) -> io::Result<T> {
let next_obj = Deserializer::from_reader(reader).into_iter::<T>().next();
match next_obj {
Some(result) => result.map_err(Into::into),
None => Err(invalid_data("premature EOF")),
}
}

fn yield_next_obj<T: DeserializeOwned, R: Read>(
mut reader: R,
at_start: &mut bool,
) -> io::Result<Option<T>> {
if !*at_start {
*at_start = true;
if read_skipping_ws(&mut reader)? == b'[' {
// read the next char to see if the array is empty
let peek = read_skipping_ws(&mut reader)?;
if peek == b']' {
Ok(None)
} else {
deserialize_single(io::Cursor::new([peek]).chain(reader)).map(Some)
}
} else {
Err(invalid_data("`[` not found"))
}
} else {
match read_skipping_ws(&mut reader)? {
b',' => deserialize_single(reader).map(Some),
b']' => Ok(None),
_ => Err(invalid_data("`,` or `]` not found")),
}
}
}

pub fn iter_json_array<T: DeserializeOwned, R: Read>(
mut reader: R,
) -> impl Iterator<Item = Result<T, io::Error>> {
let mut at_start = false;
std::iter::from_fn(move || yield_next_obj(&mut reader, &mut at_start).transpose())
}