-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rust: add a new deserializer version (#3)
- Loading branch information
1 parent
a847c1e
commit a0443a9
Showing
6 changed files
with
177 additions
and
82 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,70 @@ | ||
use serdejsonbench::JsonIterator; | ||
use std::{io::Write, time::Instant}; | ||
|
||
fn parse() { | ||
let iter = JsonIterator::new(r#"../../json/256MB.json"#.into()); | ||
let mut count = 0; | ||
for json in iter { | ||
let json = json.unwrap(); | ||
assert_eq!("FULL", json.delta_mode); | ||
count = count + 1; | ||
} | ||
assert_eq!(68495, count); | ||
} | ||
|
||
fn main() { | ||
let args: Vec<String> = std::env::args().skip(1).collect(); | ||
|
||
if args.len() == 3 && args[0] == "--console" && args[1] == "--times" { | ||
let now = Instant::now(); | ||
let times: u32 = args[2].as_str().parse().unwrap(); | ||
|
||
for _ in 0..times { | ||
print!("."); | ||
std::io::stdout().flush().unwrap(); | ||
parse(); | ||
} | ||
|
||
let elapsed = now.elapsed().as_millis(); | ||
let avg = elapsed / u128::from(times); | ||
println!("done in {elapsed}ms avg {avg}ms"); | ||
} else { | ||
panic!("bad arguments {args:?}") | ||
} | ||
|
||
println!("done"); | ||
} | ||
use serdejsonbench::JsonIterator; | ||
use serdejsonbench::{iter_json_array, Json}; | ||
use std::fs::File; | ||
use std::{io::Write, time::Instant}; | ||
|
||
use std::io::BufReader; | ||
|
||
fn parsev2() { | ||
let reader = BufReader::with_capacity(8192, File::open(r#"../../json/256MB.json"#).unwrap()); | ||
let iter = iter_json_array(reader); | ||
let mut count = 0; | ||
for json in iter { | ||
let json: Json = json.unwrap(); | ||
assert_eq!("FULL", json.delta_mode); | ||
count = count + 1; | ||
} | ||
assert_eq!(68495, count); | ||
} | ||
|
||
fn parse() { | ||
let iter = JsonIterator::new(r#"../../json/256MB.json"#.into()); | ||
let mut count = 0; | ||
for json in iter { | ||
let json = json.unwrap(); | ||
assert_eq!("FULL", json.delta_mode); | ||
count = count + 1; | ||
} | ||
assert_eq!(68495, count); | ||
} | ||
|
||
enum Method { | ||
V1, | ||
V2, | ||
} | ||
|
||
fn main() { | ||
let args: Vec<String> = std::env::args().skip(1).collect(); | ||
|
||
if args.len() > 2 && args[0] == "--console" && args[1] == "--times" { | ||
let now = Instant::now(); | ||
let times: u32 = args[2].as_str().parse().unwrap(); | ||
|
||
let method = if args.len() > 4 && args[3] == "--method" && args[4] == "v2" { | ||
println!("v2 version"); | ||
Method::V2 | ||
} else { | ||
println!("v1 version"); | ||
Method::V1 | ||
}; | ||
|
||
let parse = match method { | ||
Method::V1 => parse, | ||
Method::V2 => parsev2, | ||
}; | ||
|
||
for _ in 0..times { | ||
print!("."); | ||
std::io::stdout().flush().unwrap(); | ||
parse(); | ||
} | ||
|
||
let elapsed = now.elapsed().as_millis(); | ||
let avg = elapsed / u128::from(times); | ||
println!("done in {elapsed}ms avg {avg}ms"); | ||
} else { | ||
panic!("bad arguments {args:?}") | ||
} | ||
|
||
println!("done"); | ||
} |
42 changes: 20 additions & 22 deletions
42
rust/serdejsonbench/examples/serde_json_stream_deserializer.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,20 @@ | ||
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399 | ||
|
||
use serdejsonbench::JsonIterator; | ||
|
||
fn main() -> std::io::Result<()> { | ||
let iter = JsonIterator::new( | ||
r#"../../json/256MB.json"#.into(), | ||
); | ||
let mut count = 0; | ||
for json in iter { | ||
let json = json.unwrap(); | ||
assert_eq!("FULL", json.delta_mode); | ||
count = count + 1; | ||
if count % 1000 == 0 { | ||
println!("{count}"); | ||
} | ||
} | ||
|
||
assert_eq!(68495, count); | ||
println!("done"); | ||
Ok(()) | ||
} | ||
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399 | ||
|
||
use serdejsonbench::JsonIterator; | ||
|
||
fn main() -> std::io::Result<()> { | ||
let iter = JsonIterator::new(r#"../../json/256MB.json"#.into()); | ||
let mut count = 0; | ||
for json in iter { | ||
let json = json.unwrap(); | ||
assert_eq!("FULL", json.delta_mode); | ||
count = count + 1; | ||
if count % 1000 == 0 { | ||
println!("{count}"); | ||
} | ||
} | ||
|
||
assert_eq!(68495, count); | ||
println!("done"); | ||
Ok(()) | ||
} |
22 changes: 11 additions & 11 deletions
22
rust/serdejsonbench/examples/stream_deserializer_api_example.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,11 @@ | ||
use serde_json::{Deserializer, Value}; | ||
|
||
fn main() { | ||
let data = "{\"k\": 3}1\"cool\"\"stuff\" 3{} [0, 1, 2]"; | ||
|
||
let stream = Deserializer::from_str(data).into_iter::<Value>(); | ||
|
||
for value in stream { | ||
println!("{}", value.unwrap()); | ||
} | ||
} | ||
use serde_json::{Deserializer, Value}; | ||
|
||
fn main() { | ||
let data = "{\"k\": 3}1\"cool\"\"stuff\" 3{} [0, 1, 2]"; | ||
|
||
let stream = Deserializer::from_str(data).into_iter::<Value>(); | ||
|
||
for value in stream { | ||
println!("{}", value.unwrap()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399 | ||
mod types; | ||
pub use types::Json; | ||
|
||
mod serde_json_stream_deserializer; | ||
pub use serde_json_stream_deserializer::JsonIterator; | ||
|
||
mod serde_json_simple_deserializer; | ||
pub use serde_json_simple_deserializer::{read_from_file, readone_from_file}; | ||
//https://github.com/serde-rs/json/issues/404#issuecomment-674293399 | ||
mod types; | ||
pub use types::Json; | ||
|
||
mod serde_json_stream_deserializer; | ||
pub use serde_json_stream_deserializer::JsonIterator; | ||
|
||
mod serde_json_simple_deserializer; | ||
pub use serde_json_simple_deserializer::{read_from_file, readone_from_file}; | ||
|
||
mod serde_json_stream_deserializer2; | ||
|
||
pub use serde_json_stream_deserializer2::iter_json_array; |
59 changes: 59 additions & 0 deletions
59
rust/serdejsonbench/src/serde_json_stream_deserializer2.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// https://github.com/serde-rs/json/issues/404#issuecomment-892957228 | ||
use serde::de::DeserializeOwned; | ||
use serde_json::{self, Deserializer}; | ||
use std::io::{self, Read}; | ||
|
||
fn read_skipping_ws(mut reader: impl Read) -> io::Result<u8> { | ||
loop { | ||
let mut byte = 0u8; | ||
reader.read_exact(std::slice::from_mut(&mut byte))?; | ||
if !byte.is_ascii_whitespace() { | ||
return Ok(byte); | ||
} | ||
} | ||
} | ||
|
||
fn invalid_data(msg: &str) -> io::Error { | ||
io::Error::new(io::ErrorKind::InvalidData, msg) | ||
} | ||
|
||
fn deserialize_single<T: DeserializeOwned, R: Read>(reader: R) -> io::Result<T> { | ||
let next_obj = Deserializer::from_reader(reader).into_iter::<T>().next(); | ||
match next_obj { | ||
Some(result) => result.map_err(Into::into), | ||
None => Err(invalid_data("premature EOF")), | ||
} | ||
} | ||
|
||
fn yield_next_obj<T: DeserializeOwned, R: Read>( | ||
mut reader: R, | ||
at_start: &mut bool, | ||
) -> io::Result<Option<T>> { | ||
if !*at_start { | ||
*at_start = true; | ||
if read_skipping_ws(&mut reader)? == b'[' { | ||
// read the next char to see if the array is empty | ||
let peek = read_skipping_ws(&mut reader)?; | ||
if peek == b']' { | ||
Ok(None) | ||
} else { | ||
deserialize_single(io::Cursor::new([peek]).chain(reader)).map(Some) | ||
} | ||
} else { | ||
Err(invalid_data("`[` not found")) | ||
} | ||
} else { | ||
match read_skipping_ws(&mut reader)? { | ||
b',' => deserialize_single(reader).map(Some), | ||
b']' => Ok(None), | ||
_ => Err(invalid_data("`,` or `]` not found")), | ||
} | ||
} | ||
} | ||
|
||
pub fn iter_json_array<T: DeserializeOwned, R: Read>( | ||
mut reader: R, | ||
) -> impl Iterator<Item = Result<T, io::Error>> { | ||
let mut at_start = false; | ||
std::iter::from_fn(move || yield_next_obj(&mut reader, &mut at_start).transpose()) | ||
} |