From 0028ddf91ae793b905621df17bd87e7429e07c48 Mon Sep 17 00:00:00 2001 From: Szabolcs Berecz Date: Sun, 13 May 2018 18:49:16 +0200 Subject: [PATCH 1/4] Simplify header tests --- tests/header.rs | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/tests/header.rs b/tests/header.rs index 8100814..48c2b19 100644 --- a/tests/header.rs +++ b/tests/header.rs @@ -2,18 +2,22 @@ extern crate sleep_parser; use sleep_parser::*; use std::fs::File; -use std::io::{BufRead, BufReader}; +use std::io::Read; #[test] fn new_header() { Header::new(FileType::Tree, 40, HashType::BLAKE2b); } +fn read_header_bytes(file_name: &str) -> Result<[u8; 32], std::io::Error> { + let mut file = File::open(file_name)?; + let mut buffer = [0u8; 32]; + file.read_exact(&mut buffer).map(|_| buffer) +} + #[test] fn from_vec_content_bitfield() { - let file = File::open("tests/fixtures/content.bitfield").unwrap(); - let mut reader = BufReader::with_capacity(32, file); - let buffer = reader.fill_buf().unwrap(); + let buffer = read_header_bytes("tests/fixtures/content.bitfield").unwrap(); let header = Header::from_vec(&buffer).unwrap(); assert!(header.is_bitfield()); assert_eq!(header.to_vec(), buffer); @@ -21,9 +25,7 @@ fn from_vec_content_bitfield() { #[test] fn from_vec_content_signatures() { - let file = File::open("tests/fixtures/content.signatures").unwrap(); - let mut reader = BufReader::with_capacity(32, file); - let buffer = reader.fill_buf().unwrap(); + let buffer = read_header_bytes("tests/fixtures/content.signatures").unwrap(); let header = Header::from_vec(&buffer).unwrap(); assert!(header.is_signatures()); assert_eq!(header.to_vec(), buffer); @@ -31,9 +33,7 @@ fn from_vec_content_signatures() { #[test] fn from_vec_content_tree() { - let file = File::open("tests/fixtures/content.tree").unwrap(); - let mut reader = BufReader::with_capacity(32, file); - let buffer = reader.fill_buf().unwrap(); + let buffer = read_header_bytes("tests/fixtures/content.tree").unwrap(); let header = Header::from_vec(&buffer).unwrap(); assert!(header.is_tree()); assert_eq!(header.to_vec(), buffer); @@ -41,9 +41,7 @@ fn from_vec_content_tree() { #[test] fn from_vec_metadata_bitfield() { - let file = File::open("tests/fixtures/metadata.bitfield").unwrap(); - let mut reader = BufReader::with_capacity(32, file); - let buffer = reader.fill_buf().unwrap(); + let buffer = read_header_bytes("tests/fixtures/metadata.bitfield").unwrap(); let header = Header::from_vec(&buffer).unwrap(); assert!(header.is_bitfield()); assert_eq!(header.to_vec(), buffer); @@ -51,9 +49,7 @@ fn from_vec_metadata_bitfield() { #[test] fn from_vec_metadata_signatures() { - let file = File::open("tests/fixtures/metadata.signatures").unwrap(); - let mut reader = BufReader::with_capacity(32, file); - let buffer = reader.fill_buf().unwrap(); + let buffer = read_header_bytes("tests/fixtures/metadata.signatures").unwrap(); let header = Header::from_vec(&buffer).unwrap(); assert!(header.is_signatures()); assert_eq!(header.to_vec(), buffer); @@ -61,9 +57,7 @@ fn from_vec_metadata_signatures() { #[test] fn from_vec_metadata_tree() { - let file = File::open("tests/fixtures/metadata.tree").unwrap(); - let mut reader = BufReader::with_capacity(32, file); - let buffer = reader.fill_buf().unwrap(); + let buffer = read_header_bytes("tests/fixtures/metadata.tree").unwrap(); let header = Header::from_vec(&buffer).unwrap(); assert!(header.is_tree()); assert_eq!(header.to_vec(), buffer); From 7f0fe2b1bc577d6b44a231007202fa342cac8145 Mon Sep 17 00:00:00 2001 From: Szabolcs Berecz Date: Sun, 13 May 2018 19:02:37 +0200 Subject: [PATCH 2/4] Fix typo --- src/header.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/header.rs b/src/header.rs index 647457b..92b2f65 100644 --- a/src/header.rs +++ b/src/header.rs @@ -76,7 +76,7 @@ impl Header { } } - /// Parse a 32 bit buffer slice into a valid Header. + /// Parse a 32 byte buffer slice into a valid Header. pub fn from_vec(buffer: &[u8]) -> Result { ensure!(buffer.len() == 32, "buffer should be 32 bytes"); From c718841be13cc496561e28ed7a0ac41859e3ce57 Mon Sep 17 00:00:00 2001 From: Szabolcs Berecz Date: Mon, 14 May 2018 10:55:09 +0200 Subject: [PATCH 3/4] Add nom based parser --- Cargo.toml | 4 ++ benches/parser.rs | 19 ++++++ src/header.rs | 44 +++++++++++++- src/lib.rs | 3 + src/parsers.rs | 146 ++++++++++++++++++++++++++++++++++++++++++++++ tests/header.rs | 11 ++++ 6 files changed, 225 insertions(+), 2 deletions(-) create mode 100644 benches/parser.rs create mode 100644 src/parsers.rs diff --git a/Cargo.toml b/Cargo.toml index 9dcaa0b..1713f3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,7 @@ readme = "README.md" [dependencies] failure = "0.1.1" byteorder = "1.2.1" + +[dependencies.nom] +version = "~4.0.0" +features = ["verbose-errors"] diff --git a/benches/parser.rs b/benches/parser.rs new file mode 100644 index 0000000..bd11c5c --- /dev/null +++ b/benches/parser.rs @@ -0,0 +1,19 @@ +#![feature(test)] +extern crate test; + +extern crate sleep_parser; + +use sleep_parser::Header; +use test::Bencher; + +const HEADER: &[u8; 32] = b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; + +#[bench] +fn hand_rolled(b: &mut Bencher) { + b.iter(|| Header::from_vec(HEADER)); +} + +#[bench] +fn nom(b: &mut Bencher) { + b.iter(|| Header::from_bytes(HEADER)); +} diff --git a/src/header.rs b/src/header.rs index 92b2f65..e96aea0 100644 --- a/src/header.rs +++ b/src/header.rs @@ -2,6 +2,8 @@ extern crate byteorder; use self::byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use failure::Error; +use nom; +use parsers; use std::io::Cursor; /// Algorithm used for hashing the data. @@ -42,14 +44,14 @@ pub enum FileType { } /// SLEEP Protocol version. -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ProtocolVersion { /// The version specified as per the paper released in 2017-09. V0, } /// Structural representation of 32 byte SLEEP headers. -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct Header { /// Type of file. pub file_type: FileType, @@ -76,6 +78,11 @@ impl Header { } } + /// Parses a 32 byte buffer slice into a valid Header. + pub fn from_bytes(buf: &[u8]) -> Result { + convert_nom_result(buf, parsers::header(buf)) + } + /// Parse a 32 byte buffer slice into a valid Header. pub fn from_vec(buffer: &[u8]) -> Result { ensure!(buffer.len() == 32, "buffer should be 32 bytes"); @@ -215,3 +222,36 @@ impl Header { && self.hash_type == HashType::BLAKE2b } } + +fn convert_nom_result( + buf: &[u8], + result: Result<(&[u8], Header), nom::Err<&[u8]>>, +) -> Result { + match result { + Ok((&[], h)) => Ok(h), + Ok((remaining, _)) => { + assert!( + buf.len() > parsers::HEADER_LENGTH, + "broken parser: input length is {}, but got unparsed input of length {}", + buf.len(), + remaining.len() + ); + Err(format_err!("input must be {} bytes", parsers::HEADER_LENGTH)) + } + Err(e @ nom::Err::Incomplete(_)) => { + assert!( + buf.len() < parsers::HEADER_LENGTH, + "broken parser: input length is {}, but got error: {:?}", + buf.len(), + e + ); + Err(format_err!("input must be {} bytes", parsers::HEADER_LENGTH)) + } + Err(nom::Err::Error(context)) => { + Err(format_err!("nom error: {:?}", context.into_error_kind())) + } + Err(nom::Err::Failure(context)) => { + Err(format_err!("nom failure: {:?}", context.into_error_kind())) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 7f75cdf..b08c1c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,8 +5,11 @@ #[macro_use] extern crate failure; +#[macro_use] +extern crate nom; mod header; +mod parsers; pub use header::*; diff --git a/src/parsers.rs b/src/parsers.rs new file mode 100644 index 0000000..a39e361 --- /dev/null +++ b/src/parsers.rs @@ -0,0 +1,146 @@ +#![cfg_attr(feature = "cargo-clippy", allow(clippy))] + +use header::*; +use nom::{be_u16, be_u8, rest}; +use std::str; + +pub(crate) const HEADER_LENGTH: usize = 32; +const VERIFY_TRAILING_ZEROS: bool = true; + +named!( + file_type, + switch!(be_u8, + 0 => value!(FileType::BitField) | + 1 => value!(FileType::Signatures) | + 2 => value!(FileType::Tree) + ) +); + +named!( + protocol_version, + switch!(be_u8, + 0 => value!(ProtocolVersion::V0) + ) +); + +named_args!( + algorithm(len: u8), + switch!(map_res!(take!(len), str::from_utf8), + "BLAKE2b" => value!(HashType::BLAKE2b) | + "Ed25519" => value!(HashType::Ed25519) | + "" => value!(HashType::None) + ) +); + +named!( + pub header
, + flat_map!( + take!(HEADER_LENGTH), + do_parse!( + tag!(b"\x05\x02\x57") >> + file_type: file_type >> + protocol_version: protocol_version >> + entry_size: be_u16 >> + + algorithm_len: verify!(be_u8, |len: u8| len <= HEADER_LENGTH as u8 - 8) >> + algorithm: apply!(algorithm, algorithm_len) >> + + verify!(rest, |bytes: &[u8]| { + let header_consumed = bytes.len() + algorithm_len as usize + 8 == HEADER_LENGTH; + let trailing_zeros = !VERIFY_TRAILING_ZEROS || bytes.iter().all(|&b| b == 0u8); + header_consumed && trailing_zeros + }) >> + + (Header { + file_type, + protocol_version, + entry_size, + hash_type: algorithm, + }) + ) + ) +); + +#[cfg(test)] +mod test { + use super::*; + + use nom; + + #[test] + fn parse_file_type() { + assert_eq!( + file_type(b"\x00"), + Ok((&[][..], FileType::BitField)) + ); + assert_eq!( + file_type(b"\x01"), + Ok((&[][..], FileType::Signatures)) + ); + assert_eq!( + file_type(b"\x02"), + Ok((&[][..], FileType::Tree)) + ); + assert!(file_type(b"\xff").is_err()); + } + + #[test] + fn parse_header() { + fn mk_header(prefix: &[u8]) -> [u8; 32] { + let mut h = [0u8; 32]; + h[0..prefix.len()].clone_from_slice(prefix); + h + } + + assert_eq!( + header(&mk_header( + b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b" + )), + Ok(( + &[][..], + Header { + file_type: FileType::Signatures, + protocol_version: ProtocolVersion::V0, + entry_size: 40, + hash_type: HashType::BLAKE2b + } + )) + ); + assert_eq!( + header(&mk_header( + b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b" + )).unwrap() + .1 + .hash_type, + HashType::BLAKE2b + ); + assert_eq!( + header(&mk_header( + b"\x05\x02W\x01\x00\x00\x28\x07Ed25519" + )).unwrap() + .1 + .hash_type, + HashType::Ed25519 + ); + assert_eq!( + header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x00")) + .unwrap() + .1 + .hash_type, + HashType::None + ); + assert!(header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x01B")).is_err()); + assert!(header(&mk_header(b"\x05\x02W\x01\x00\x00\x28\x01B")).is_err()); + + let h = b"\x05\x02W\x01\x00\x00\x28\x19BLAKE2bXXXXXXXXXXXXXXXXXX"; + assert!(header(h).is_err()); + } + + #[test] + fn invalid_algorithm_len() { + match header(b"\x05\x02W\x00\x00\x00\x00\xFF\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00") { + Err(nom::Err::Error(nom::Context::Code(_, nom::ErrorKind::Verify))) => (), + x => panic!("{:?}", x), + } + } +} diff --git a/tests/header.rs b/tests/header.rs index 48c2b19..1569438 100644 --- a/tests/header.rs +++ b/tests/header.rs @@ -74,3 +74,14 @@ fn to_vec() { ] ); } + +#[test] +fn issue_3() { + // https://github.com/datrs/sleep-parser/issues/3 + + let data = b"\x05\x02W\x01\x00\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xfb\x03p\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xb0\xbb9\xb0\xf5\xf5"; + assert!(Header::from_bytes(data).is_err()); + + let data = b"\x05\x02W\x01\x00\x00\x00\x12\x12\x12\x00\x00S\xc3\xcf\x8a2\xcc\xd1\xce9\xc4K\x9343\x00602\xb5\x07"; + assert!(Header::from_bytes(data).is_err()); +} From 344da315266410d177729928af46543ce74e8b2d Mon Sep 17 00:00:00 2001 From: Szabolcs Berecz Date: Tue, 15 May 2018 22:10:14 +0200 Subject: [PATCH 4/4] Switch over to the nom based parser. Fixes #3 --- benches/parser.rs | 7 +--- src/header.rs | 84 ++--------------------------------------------- tests/header.rs | 24 +++++++------- 3 files changed, 16 insertions(+), 99 deletions(-) diff --git a/benches/parser.rs b/benches/parser.rs index bd11c5c..2f451b0 100644 --- a/benches/parser.rs +++ b/benches/parser.rs @@ -9,11 +9,6 @@ use test::Bencher; const HEADER: &[u8; 32] = b"\x05\x02W\x01\x00\x00\x28\x07BLAKE2b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; #[bench] -fn hand_rolled(b: &mut Bencher) { - b.iter(|| Header::from_vec(HEADER)); -} - -#[bench] -fn nom(b: &mut Bencher) { +fn header_parsing(b: &mut Bencher) { b.iter(|| Header::from_bytes(HEADER)); } diff --git a/src/header.rs b/src/header.rs index e96aea0..ce22e5c 100644 --- a/src/header.rs +++ b/src/header.rs @@ -1,10 +1,9 @@ extern crate byteorder; -use self::byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use self::byteorder::{BigEndian, WriteBytesExt}; use failure::Error; use nom; use parsers; -use std::io::Cursor; /// Algorithm used for hashing the data. #[derive(Debug, PartialEq)] @@ -84,86 +83,9 @@ impl Header { } /// Parse a 32 byte buffer slice into a valid Header. + #[deprecated(note = "Use from_bytes")] pub fn from_vec(buffer: &[u8]) -> Result { - ensure!(buffer.len() == 32, "buffer should be 32 bytes"); - - let mut rdr = Cursor::new(buffer); - let byte = rdr.read_u8().unwrap(); - ensure!( - byte == 5, - format!( - "The first byte of a SLEEP header should be '5', found {}", - byte - ) - ); - - let byte = rdr.read_u8().unwrap(); - ensure!( - byte == 2, - format!( - "The second byte of a SLEEP header should be '2', found {}", - byte - ) - ); - - let byte = rdr.read_u8().unwrap(); - ensure!( - byte == 87, - format!( - "The third byte of a SLEEP header should be '87', found {}", - byte - ) - ); - - let file_type = match rdr.read_u8().unwrap() { - 0 => FileType::BitField, - 1 => FileType::Signatures, - 2 => FileType::Tree, - num => bail!(format!( - "The fourth byte '{}' does not belong to any known SLEEP file type", - num - )), - }; - - let protocol_version = match rdr.read_u8().unwrap() { - 0 => ProtocolVersion::V0, - num => bail!(format!( - "The fifth byte '{}' does not belong to any known SLEEP protocol protocol_version", - num - )), - }; - - // Read entry size which will inform how many bytes to read next. - let entry_size = rdr.read_u16::().unwrap(); - - // Read out the "entry_size" bytes into a string. - // NOTE(yw): there should be a more concise way of doing this. - let hash_name_len = rdr.read_u8().unwrap() as usize; - let current = rdr.position() as usize; - - let hash_name_upper = current + hash_name_len; - let buf_slice = &buffer[current..hash_name_upper]; - rdr.set_position(hash_name_upper as u64 + 1); - let algo = ::std::str::from_utf8(buf_slice) - .expect("The algorithm string was invalid utf8 encoded"); - - let hash_type = match algo { - "BLAKE2b" => HashType::BLAKE2b, - "Ed25519" => HashType::Ed25519, - _ => HashType::None, - }; - - for index in rdr.position()..32 { - let byte = rdr.read_u8().unwrap(); - ensure!(byte == 0, format!("The remainder of the header should be zero-filled. Found byte '{}' at position '{}'.", byte, index)); - } - - Ok(Header { - protocol_version, - entry_size, - file_type, - hash_type, - }) + Header::from_bytes(buffer) } /// Convert a `Header` into a `Vec`. Use this to persist a header back to diff --git a/tests/header.rs b/tests/header.rs index 1569438..8bc7574 100644 --- a/tests/header.rs +++ b/tests/header.rs @@ -16,49 +16,49 @@ fn read_header_bytes(file_name: &str) -> Result<[u8; 32], std::io::Error> { } #[test] -fn from_vec_content_bitfield() { +fn from_bytes_content_bitfield() { let buffer = read_header_bytes("tests/fixtures/content.bitfield").unwrap(); - let header = Header::from_vec(&buffer).unwrap(); + let header = Header::from_bytes(&buffer).unwrap(); assert!(header.is_bitfield()); assert_eq!(header.to_vec(), buffer); } #[test] -fn from_vec_content_signatures() { +fn from_bytes_content_signatures() { let buffer = read_header_bytes("tests/fixtures/content.signatures").unwrap(); - let header = Header::from_vec(&buffer).unwrap(); + let header = Header::from_bytes(&buffer).unwrap(); assert!(header.is_signatures()); assert_eq!(header.to_vec(), buffer); } #[test] -fn from_vec_content_tree() { +fn from_bytes_content_tree() { let buffer = read_header_bytes("tests/fixtures/content.tree").unwrap(); - let header = Header::from_vec(&buffer).unwrap(); + let header = Header::from_bytes(&buffer).unwrap(); assert!(header.is_tree()); assert_eq!(header.to_vec(), buffer); } #[test] -fn from_vec_metadata_bitfield() { +fn from_bytes_metadata_bitfield() { let buffer = read_header_bytes("tests/fixtures/metadata.bitfield").unwrap(); - let header = Header::from_vec(&buffer).unwrap(); + let header = Header::from_bytes(&buffer).unwrap(); assert!(header.is_bitfield()); assert_eq!(header.to_vec(), buffer); } #[test] -fn from_vec_metadata_signatures() { +fn from_bytes_metadata_signatures() { let buffer = read_header_bytes("tests/fixtures/metadata.signatures").unwrap(); - let header = Header::from_vec(&buffer).unwrap(); + let header = Header::from_bytes(&buffer).unwrap(); assert!(header.is_signatures()); assert_eq!(header.to_vec(), buffer); } #[test] -fn from_vec_metadata_tree() { +fn from_bytes_metadata_tree() { let buffer = read_header_bytes("tests/fixtures/metadata.tree").unwrap(); - let header = Header::from_vec(&buffer).unwrap(); + let header = Header::from_bytes(&buffer).unwrap(); assert!(header.is_tree()); assert_eq!(header.to_vec(), buffer); }