diff --git a/Cargo.lock b/Cargo.lock index cd0d63d..d661f48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -149,7 +149,7 @@ dependencies = [ [[package]] name = "json5format" -version = "0.2.1" +version = "0.2.2" dependencies = [ "anyhow", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index 49eef91..7b3a147 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json5format" -version = "0.2.1" +version = "0.2.2" authors = [ "Rich Kadel ", "David Tamas-Parris ", diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4641835596251136 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4641835596251136 new file mode 100644 index 0000000..c9b34af Binary files /dev/null and b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4641835596251136 differ diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4802677486780416 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4802677486780416 new file mode 100644 index 0000000..c58eee6 --- /dev/null +++ b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4802677486780416 @@ -0,0 +1 @@ +]} \ No newline at end of file diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4993106563956736 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4993106563956736 new file mode 100644 index 0000000..04bb6c5 Binary files /dev/null and b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4993106563956736 differ diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-6541106597199872 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-6541106597199872 new file mode 100644 index 0000000..c7b21cf Binary files /dev/null and b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-6541106597199872 differ diff --git a/src/content.rs b/src/content.rs index e91100a..ad26e00 100644 --- a/src/content.rs +++ b/src/content.rs @@ -30,7 +30,19 @@ impl ParsedDocument { /// If a filename is also provided, any parsing errors will include the filename with the line /// number and column where the error was encountered. pub fn from_str(buffer: &str, filename: Option) -> Result { + Self::from_str_with_nesting_limit(buffer, filename, Parser::DEFAULT_NESTING_LIMIT) + } + + /// Like `from_str()` but also overrides the default nesting limit, used to + /// catch deeply nested JSON5 documents before overflowing the program + /// stack. + pub fn from_str_with_nesting_limit( + buffer: &str, + filename: Option, + nesting_limit: usize, + ) -> Result { let mut parser = Parser::new(&filename); + parser.set_nesting_limit(nesting_limit); let content = parser.parse(&buffer)?; Ok(Self { owned_buffer: None, filename, content }) diff --git a/src/parser.rs b/src/parser.rs index dc1e2fd..053f80d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -227,11 +227,18 @@ pub(crate) struct Parser<'parser> { /// and so on. scope_stack: Vec>>, + /// To avoid accidentally overflowing the program stack, limit the number of + /// nested scopes and generate an error if it is exceeded. + nesting_limit: usize, + /// Captures a colon token when expected. colon_capturer: Capturer, } impl<'parser> Parser<'parser> { + /// The default limit of nested scopes when parsing a JSON5 document. + pub const DEFAULT_NESTING_LIMIT: usize = 1000; + pub fn new(filename: &'parser Option) -> Self { let remaining = ""; let current_line = &remaining; @@ -244,11 +251,19 @@ impl<'parser> Parser<'parser> { column_number: 1, next_line_number: 1, next_column_number: 1, - scope_stack: vec![Rc::new(RefCell::new(Array::new(vec![])))], + scope_stack: Vec::default(), + nesting_limit: Self::DEFAULT_NESTING_LIMIT, colon_capturer: Capturer::new(&COLON), } } + /// To avoid accidentally overflowing the program stack, there is a mutable + /// limit on the number of nested scopes allowed. If this limit is exceeded + /// while parsing a document, a parser error is generated. + pub fn set_nesting_limit(&mut self, new_limit: usize) { + self.nesting_limit = new_limit; + } + fn current_scope(&self) -> Rc> { assert!(self.scope_stack.len() > 0); self.scope_stack.last().unwrap().clone() @@ -311,6 +326,12 @@ impl<'parser> Parser<'parser> { self.with_container(|container| container.add_value(value_ref.clone(), self))?; if is_container { self.scope_stack.push(value_ref.clone()); + if self.scope_stack.len() > self.nesting_limit { + return Err(self.error(format!( + "The given JSON5 document exceeds the parser's nesting limit of {}", + self.nesting_limit + ))); + } } Ok(()) } @@ -483,7 +504,11 @@ impl<'parser> Parser<'parser> { fn exit_scope(&mut self) -> Result<(), Error> { self.scope_stack.pop(); - Ok(()) + if self.scope_stack.is_empty() { + Err(self.error("Closing brace without a matching opening brace")) + } else { + Ok(()) + } } fn close_object(&mut self) -> Result<(), Error> { @@ -515,7 +540,7 @@ impl<'parser> Parser<'parser> { indicator += &"~".repeat(if self.line_number == self.next_line_number { self.next_column_number - self.column_number - 1 } else { - self.current_line.len() - self.column_number + 0 }); } Error::parse(self.location(), format!("{}:\n{}\n{}", err, self.current_line, indicator)) @@ -563,8 +588,34 @@ impl<'parser> Parser<'parser> { } } + /// Parse the given document string as a JSON5 document containing Array + /// elements (with implicit outer braces). Document locations (use in, for + /// example, error messages), are 1-based and start at line 1, column 1. pub fn parse(&mut self, buffer: &'parser str) -> Result { + self.parse_from_location(buffer, 1, 1) + } + + /// Parse the given document string as a JSON5 document containing Array + /// elements (with implicit outer braces), and use the given 1-based line + /// and column numbers when referring to document locations. + pub fn parse_from_location( + &mut self, + buffer: &'parser str, + starting_line_number: usize, + starting_column_number: usize, + ) -> Result { self.remaining = buffer; + self.current_line = &self.remaining; + + assert!(starting_line_number > 0, "document line numbers are 1-based"); + self.next_line_number = starting_line_number; + self.next_column_number = starting_column_number; + + self.next_line = self.current_line; + self.line_number = self.next_line_number - 1; + self.column_number = self.next_column_number - 1; + self.scope_stack = vec![Rc::new(RefCell::new(Array::new(vec![])))]; + let mut next_token = Capturer::new(&NEXT_TOKEN); let mut single_quoted = Capturer::new(&SINGLE_QUOTED); let mut double_quoted = Capturer::new(&DOUBLE_QUOTED); @@ -1680,4 +1731,85 @@ mod tests { let object_value = Object::new(vec![]); assert!(object_value.is_object()); } + + #[test] + fn test_document_exceeds_nesting_limit() { + let mut parser = Parser::new(&None); + parser.set_nesting_limit(5); + let good_buffer = r##"{ + list_of_lists_of_lists: [[[]]] +}"##; + parser.parse_from_location(&good_buffer, 8, 15).expect("should NOT exceed nesting limit"); + + let bad_buffer = r##"{ + list_of_lists_of_lists: [[[[]]]] +}"##; + let err = parser + .parse_from_location(&bad_buffer, 8, 15) + .expect_err("should exceed nesting limit"); + match err { + Error::Parse(_, message) => { + assert_eq!( + message, + r##"The given JSON5 document exceeds the parser's nesting limit of 5: + list_of_lists_of_lists: [[[[]]]] + ^"## + ) + } + _ => panic!("expected a parser error"), + } + } + + #[test] + fn test_parse_from_location_error_location() { + let filename = Some("mixed_content.md".to_string()); + let mixed_document = r##" +Mixed Content Doc +================= + +This is a document with embedded JSON5 content. + +```json5 +json5_value = { + // The next line should generate a parser error + 999, +} +``` + +End of mixed content document. +"##; + let json5_slice = + &mixed_document[mixed_document.find("{").unwrap()..mixed_document.find("}").unwrap()]; + let mut parser = Parser::new(&filename); + let err = parser + .parse_from_location(json5_slice, 8, 15) + .expect_err("check error message for location"); + match err { + Error::Parse(Some(loc), message) => { + assert_eq!(loc.file, Some("mixed_content.md".to_owned())); + assert_eq!(loc.line, 10); + assert_eq!(loc.col, 5); + assert_eq!( + message, + r##"Object values require property names: + 999, + ^~~"## + ) + } + _ => panic!("expected a parser error"), + } + } + + #[test] + fn test_doc_with_nulls() { + let mut parser = Parser::new(&None); + let buffer = "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[////[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"; + let err = parser.parse(&buffer).expect_err("should fail"); + match err { + Error::Parse(_, message) => { + assert!(message.starts_with("Mismatched braces in the document:")); + } + _ => panic!("expected a parser error"), + } + } } diff --git a/tests/lib.rs b/tests/lib.rs index 87b6f73..29b2832 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -1688,6 +1688,45 @@ fn test_parse_error_block_comment_not_closed() { .unwrap(); } +#[test] +fn test_parse_error_closing_brace_without_opening_brace() { + test_format(FormatTest { + input: r##"]"##, + error: Some( + r#"Parse error: 1:1: Closing brace without a matching opening brace: +] +^"#, + ), + ..Default::default() + }) + .unwrap(); + + test_format(FormatTest { + input: r##" + + ]"##, + error: Some( + r#"Parse error: 3:3: Closing brace without a matching opening brace: + ] + ^"#, + ), + ..Default::default() + }) + .unwrap(); + + test_format(FormatTest { + input: r##" + }"##, + error: Some( + r#"Parse error: 2:5: Invalid Object token found while parsing an Array of 0 items (mismatched braces?): + } + ^"#, + ), + ..Default::default() + }) + .unwrap(); +} + #[test] fn test_multibyte_unicode_chars() { test_format(FormatTest { @@ -1725,12 +1764,21 @@ fn test_multibyte_unicode_chars() { .unwrap(); } -fn visit_dir(dir: &Path, cb: F) -> io::Result<()> +#[test] +fn test_empty_document() { + test_format(FormatTest { options: None, input: "", expected: "", ..Default::default() }) + .unwrap(); +} + +fn visit_dir(dir: &Path, cb: &mut F) -> io::Result<()> where - F: Fn(&DirEntry) -> Result<(), std::io::Error> + Copy, + F: FnMut(&DirEntry) -> Result<(), std::io::Error>, { if !dir.is_dir() { - Err(io::Error::new(io::ErrorKind::Other, "visit_dir called with an invalid path")) + Err(io::Error::new( + io::ErrorKind::Other, + format!("visit_dir called with an invalid path: {:?}", dir), + )) } else { for entry in fs::read_dir(dir)? { let entry = entry?; @@ -1751,26 +1799,52 @@ where /// /// To manually verify test samples, use: /// cargo test test_parsing_samples_does_not_crash -- --nocapture +/// +/// To print the full error message (including the line and pointer to the +/// column), use: +/// JSON5FORMAT_TEST_FULL_ERRORS=1 cargo test test_parsing_samples_does_not_crash -- --nocapture +/// To point to a different samples directory: +/// JSON5FORMAT_TEST_SAMPLES_DIR="/tmp/fuzz_corpus" cargo test test_parsing_samples_does_not_crash #[test] fn test_parsing_samples_does_not_crash() -> Result<(), std::io::Error> { - let mut pathbuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - pathbuf.push("samples"); - visit_dir(pathbuf.as_path(), |entry| { + let mut count = 0; + let pathbuf = if let Some(samples_dir) = option_env!("JSON5FORMAT_TEST_SAMPLES_DIR") { + PathBuf::from(samples_dir) + } else { + let mut manifest_samples = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + manifest_samples.push("samples"); + manifest_samples + }; + visit_dir(pathbuf.as_path(), &mut |entry| { + count += 1; let filename = entry.path().into_os_string().to_string_lossy().to_string(); let mut buffer = String::new(); - fs::File::open(&entry.path())?.read_to_string(&mut buffer)?; + println!("{}. Parsing: {} ...", count, filename); + if let Err(err) = fs::File::open(&entry.path())?.read_to_string(&mut buffer) { + println!("Ignoring failure to read the file into a string: {:?}", err); + return Ok(()); + } let result = ParsedDocument::from_string(buffer, Some(filename.clone())); match result { Ok(_parsed_document) => { - println!("Successfully parsed: {}", filename); + println!(" ... Success"); Ok(()) } - Err(Error::Parse(_, message)) => { - println!( - "Caught input error: {}\n {}", - filename, - message.lines().next().unwrap() - ); + Err(err @ Error::Parse(..)) => { + if option_env!("JSON5FORMAT_TEST_FULL_ERRORS") == Some("1") { + println!(" ... Handled input error:\n{}", err); + } else if let Error::Parse(some_loc, message) = err { + let loc_string = if let Some(loc) = some_loc { + format!(" at {}:{}", loc.line, loc.col) + } else { + "".to_owned() + }; + let mut first_line = message.lines().next().unwrap(); + // strip the colon off the end of the first line of a parser error message + first_line = &first_line[0..first_line.len() - 1]; + println!(" ... Handled input error{}: {}", loc_string, first_line); + } + // It's OK if the input file is bad, as long as the parser fails // gracefully. Ok(())