diff --git a/Cargo.lock b/Cargo.lock index cd0d63d..10365c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -149,7 +149,7 @@ dependencies = [ [[package]] name = "json5format" -version = "0.2.1" +version = "0.2.3" dependencies = [ "anyhow", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index 49eef91..5a29df3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json5format" -version = "0.2.1" +version = "0.2.3" authors = [ "Rich Kadel ", "David Tamas-Parris ", diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4641835596251136 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4641835596251136 new file mode 100644 index 0000000..c9b34af Binary files /dev/null and b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4641835596251136 differ diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4802677486780416 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4802677486780416 new file mode 100644 index 0000000..c58eee6 --- /dev/null +++ b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4802677486780416 @@ -0,0 +1 @@ +]} \ No newline at end of file diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4993106563956736 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4993106563956736 new file mode 100644 index 0000000..04bb6c5 Binary files /dev/null and b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4993106563956736 differ diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-6541106597199872 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-6541106597199872 new file mode 100644 index 0000000..c7b21cf Binary files /dev/null and b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-6541106597199872 differ diff --git a/src/content.rs b/src/content.rs index e91100a..ad26e00 100644 --- a/src/content.rs +++ b/src/content.rs @@ -30,7 +30,19 @@ impl ParsedDocument { /// If a filename is also provided, any parsing errors will include the filename with the line /// number and column where the error was encountered. pub fn from_str(buffer: &str, filename: Option) -> Result { + Self::from_str_with_nesting_limit(buffer, filename, Parser::DEFAULT_NESTING_LIMIT) + } + + /// Like `from_str()` but also overrides the default nesting limit, used to + /// catch deeply nested JSON5 documents before overflowing the program + /// stack. + pub fn from_str_with_nesting_limit( + buffer: &str, + filename: Option, + nesting_limit: usize, + ) -> Result { let mut parser = Parser::new(&filename); + parser.set_nesting_limit(nesting_limit); let content = parser.parse(&buffer)?; Ok(Self { owned_buffer: None, filename, content }) diff --git a/src/parser.rs b/src/parser.rs index dc1e2fd..0180b47 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -197,6 +197,42 @@ impl Capturer { } } +/// This internal struct holds the information needed to print a +/// contextually-relevant portion of the line (if not the entire line) where a +/// parser error was caught, the first character of the error on that line, and +/// the number of characters from that initial character index (1 or more) to +/// highlight as being part of the error. +struct ParserErrorContext { + /// The error line to be printed with a parser error. + line: String, + + /// The starting character of the error (zero-based index). + indicator_start: usize, + + /// The number of characters to highlight, including the character at the + /// `indicator_start` (at least 1). + indicator_len: usize, +} + +impl ParserErrorContext { + fn new(line: String, indicator_start: usize, indicator_len: usize) -> Self { + assert!(indicator_len >= 1); + Self { line, indicator_start, indicator_len } + } + + fn line(&self) -> &str { + &self.line + } + + fn indicator(&self) -> String { + let mut line = " ".repeat(self.indicator_start) + "^"; + if self.indicator_len > 1 { + line += &"~".repeat(self.indicator_len - 1); + } + line + } +} + pub(crate) struct Parser<'parser> { /// The remaining text in the input buffer since the last capture. remaining: &'parser str, @@ -227,11 +263,18 @@ pub(crate) struct Parser<'parser> { /// and so on. scope_stack: Vec>>, + /// To avoid accidentally overflowing the program stack, limit the number of + /// nested scopes and generate an error if it is exceeded. + nesting_limit: usize, + /// Captures a colon token when expected. colon_capturer: Capturer, } impl<'parser> Parser<'parser> { + /// The default limit of nested scopes when parsing a JSON5 document. + pub const DEFAULT_NESTING_LIMIT: usize = 1000; + pub fn new(filename: &'parser Option) -> Self { let remaining = ""; let current_line = &remaining; @@ -244,11 +287,19 @@ impl<'parser> Parser<'parser> { column_number: 1, next_line_number: 1, next_column_number: 1, - scope_stack: vec![Rc::new(RefCell::new(Array::new(vec![])))], + scope_stack: Vec::default(), + nesting_limit: Self::DEFAULT_NESTING_LIMIT, colon_capturer: Capturer::new(&COLON), } } + /// To avoid accidentally overflowing the program stack, there is a mutable + /// limit on the number of nested scopes allowed. If this limit is exceeded + /// while parsing a document, a parser error is generated. + pub fn set_nesting_limit(&mut self, new_limit: usize) { + self.nesting_limit = new_limit; + } + fn current_scope(&self) -> Rc> { assert!(self.scope_stack.len() > 0); self.scope_stack.last().unwrap().clone() @@ -311,6 +362,12 @@ impl<'parser> Parser<'parser> { self.with_container(|container| container.add_value(value_ref.clone(), self))?; if is_container { self.scope_stack.push(value_ref.clone()); + if self.scope_stack.len() > self.nesting_limit { + return Err(self.error(format!( + "The given JSON5 document exceeds the parser's nesting limit of {}", + self.nesting_limit + ))); + } } Ok(()) } @@ -483,7 +540,11 @@ impl<'parser> Parser<'parser> { fn exit_scope(&mut self) -> Result<(), Error> { self.scope_stack.pop(); - Ok(()) + if self.scope_stack.is_empty() { + Err(self.error("Closing brace without a matching opening brace")) + } else { + Ok(()) + } } fn close_object(&mut self) -> Result<(), Error> { @@ -510,15 +571,14 @@ impl<'parser> Parser<'parser> { } pub fn error(&self, err: impl std::fmt::Display) -> Error { - let mut indicator = " ".repeat(self.column_number - 1) + "^"; - if self.column_number < self.next_column_number - 1 { - indicator += &"~".repeat(if self.line_number == self.next_line_number { - self.next_column_number - self.column_number - 1 - } else { - self.current_line.len() - self.column_number - }); - } - Error::parse(self.location(), format!("{}:\n{}\n{}", err, self.current_line, indicator)) + const MAX_ERROR_LINE_LEN: usize = 200; + const MIN_CONTEXT_LEN: usize = 10; + const ELLIPSIS: &str = "\u{2026}"; + let error_context = self.get_error_context(MAX_ERROR_LINE_LEN, MIN_CONTEXT_LEN, ELLIPSIS); + Error::parse( + self.location(), + format!("{}:\n{}\n{}", err, error_context.line(), error_context.indicator()), + ) } fn consume_if_matched<'a>(&mut self, matched: Option>) -> bool { @@ -563,8 +623,34 @@ impl<'parser> Parser<'parser> { } } + /// Parse the given document string as a JSON5 document containing Array + /// elements (with implicit outer braces). Document locations (use in, for + /// example, error messages), are 1-based and start at line 1, column 1. pub fn parse(&mut self, buffer: &'parser str) -> Result { + self.parse_from_location(buffer, 1, 1) + } + + /// Parse the given document string as a JSON5 document containing Array + /// elements (with implicit outer braces), and use the given 1-based line + /// and column numbers when referring to document locations. + pub fn parse_from_location( + &mut self, + buffer: &'parser str, + starting_line_number: usize, + starting_column_number: usize, + ) -> Result { self.remaining = buffer; + self.current_line = &self.remaining; + + assert!(starting_line_number > 0, "document line numbers are 1-based"); + self.next_line_number = starting_line_number; + self.next_column_number = starting_column_number; + + self.next_line = self.current_line; + self.line_number = self.next_line_number - 1; + self.column_number = self.next_column_number - 1; + self.scope_stack = vec![Rc::new(RefCell::new(Array::new(vec![])))]; + let mut next_token = Capturer::new(&NEXT_TOKEN); let mut single_quoted = Capturer::new(&SINGLE_QUOTED); let mut double_quoted = Capturer::new(&DOUBLE_QUOTED); @@ -677,12 +763,471 @@ impl<'parser> Parser<'parser> { Err(self.error("Mismatched braces in the document")) } } + + /// Returns the given `current_line` and an `indicator` line: spaces, followed + /// by a carat (`^`) that points at the given `column_number`, followed by + /// tilde's (`~`) as long as the error token. + /// + /// If the line is longer than a set maximum length, the line is trimmed and + /// the indicator positions are adjusted. + fn get_error_context( + &self, + max_error_line_len: usize, + min_context_len: usize, + ellipsis: &str, + ) -> ParserErrorContext { + let indicator_len = if self.line_number == self.next_line_number { + std::cmp::max(self.next_column_number - self.column_number, 1) + } else { + 1 + }; + + // `indicator_start` is a 0-based char position + let indicator_start = self.column_number - 1; + + let error_line_len = self.current_line.chars().count(); + if error_line_len <= max_error_line_len { + ParserErrorContext::new(self.current_line.to_owned(), indicator_start, indicator_len) + } else { + trim_error_line_and_indicator( + self.current_line, + indicator_start, + indicator_len, + error_line_len, + max_error_line_len, + min_context_len, + ellipsis, + ) + } + } +} + +struct CharRange { + range: std::ops::Range, +} + +impl CharRange { + fn new(range: std::ops::Range) -> Self { + Self { range } + } + + fn to_byte_range(self, from_string: &str) -> Option> { + let char_len = from_string.chars().count(); + let mut some_start_byte = + if self.range.start == char_len { Some(from_string.len()) } else { None }; + let mut some_end_byte = + if self.range.end == char_len { Some(from_string.len()) } else { None }; + if let (Some(start_byte), Some(end_byte)) = (some_start_byte, some_end_byte) { + return Some(start_byte..end_byte); + } + for (char_pos, (byte_pos, _char)) in from_string.char_indices().enumerate() { + if char_pos == self.range.start { + if let Some(end_byte) = some_end_byte { + return Some(byte_pos..end_byte); + } + some_start_byte = Some(byte_pos); + } + if char_pos == self.range.end { + if let Some(start_byte) = some_start_byte { + return Some(start_byte..byte_pos); + } + some_end_byte = Some(byte_pos); + } + } + None + } +} + +fn trim_error_line_and_indicator( + error_line: &str, + indicator_start: usize, + mut indicator_len: usize, + error_line_len: usize, + max_error_line_len: usize, + min_context_len: usize, + ellipsis: &str, +) -> ParserErrorContext { + let ellipsis_len = ellipsis.chars().count(); + + assert!(max_error_line_len > ellipsis_len); + assert!(max_error_line_len < error_line_len); + assert!(indicator_start <= error_line_len); + assert!(indicator_len == 1 || (indicator_start + indicator_len) <= error_line_len); + + indicator_len = std::cmp::min(indicator_len, max_error_line_len); + + let min_right_context_len = std::cmp::max(min_context_len, indicator_len); + + let context_end = + std::cmp::min(indicator_start + min_right_context_len, error_line_len - ellipsis_len); + if context_end < max_error_line_len - ellipsis_len { + let slice_bytes = CharRange::new(0..(max_error_line_len - ellipsis_len)) + .to_byte_range(error_line) + .expect("char indices should map to String bytes"); + return ParserErrorContext::new( + error_line[slice_bytes].to_string() + ellipsis, + indicator_start, + indicator_len, + ); + } + + let context_start = indicator_start - std::cmp::min(indicator_start, min_context_len); + if error_line_len - context_start < max_error_line_len - ellipsis_len { + let start_char = error_line_len - (max_error_line_len - ellipsis_len); + let slice_bytes = CharRange::new(start_char..error_line_len) + .to_byte_range(error_line) + .expect("char indices should map to String bytes"); + return ParserErrorContext::new( + ellipsis.to_owned() + &error_line[slice_bytes], + (indicator_start + ellipsis_len) - start_char, + indicator_len, + ); + } + + let margin_chars = + max_error_line_len - std::cmp::min(max_error_line_len, (ellipsis_len * 2) + indicator_len); + let right_margin = std::cmp::min( + error_line_len - std::cmp::min(error_line_len, indicator_start + indicator_len), + margin_chars / 2, + ); + let left_margin = margin_chars - right_margin; + let mut start_char = indicator_start - left_margin; + let mut end_char = + std::cmp::min(indicator_start + indicator_len + right_margin, error_line_len); + let mut start_ellipsis = ellipsis; + let mut end_ellipsis = ellipsis; + if start_char == 0 { + start_ellipsis = ""; + end_char += ellipsis_len; + } else if end_char == error_line_len { + end_ellipsis = ""; + start_char -= ellipsis_len; + } + + let slice_bytes = CharRange::new(start_char..end_char) + .to_byte_range(error_line) + .expect("char indices should map to String bytes"); + ParserErrorContext::new( + start_ellipsis.to_owned() + &error_line[slice_bytes] + end_ellipsis, + (indicator_start + ellipsis_len) - start_char, + indicator_len, + ) } #[cfg(test)] mod tests { use {super::*, crate::test_error, proptest::prelude::*}; + fn gen_error_line_test( + error_line: &str, + pattern: &str, + max_error_line_len: usize, + min_context_len: usize, + ellipsis: &str, + expected_errorline: &str, + expected_indicator: &str, + ) -> Result<(), String> { + let some_newline = pattern.find("\n"); + let pattern_line1 = + if let Some(newline) = some_newline { &pattern[0..newline] } else { &pattern }; + assert!(pattern_line1.len() > 0); + let indicator_start = error_line.find(pattern_line1).expect("pattern not found in line"); + let end = indicator_start + pattern.len(); + let indicator_len = end - indicator_start; + let error_context = if error_line.chars().count() <= max_error_line_len { + ParserErrorContext::new(error_line.to_owned(), indicator_start, indicator_len) + } else { + trim_error_line_and_indicator( + error_line, + indicator_start, + indicator_len, + error_line.chars().count(), + max_error_line_len, + min_context_len, + ellipsis, + ) + }; + let actual_errorline = error_context.line(); + let actual_indicator = error_context.indicator(); + let mut errors = String::new(); + if expected_errorline != actual_errorline { + println!( + r#" +expected_errorline: >>>{}<<< (charlen={}) + actual_errorline: >>>{}<<< (charlen={} of {}, min context len={})"#, + expected_errorline, + expected_errorline.chars().count(), + actual_errorline, + actual_errorline.chars().count(), + max_error_line_len, + min_context_len, + ); + errors.push_str("actual errorline does not match expected"); + } else if expected_indicator != actual_indicator { + println!( + r#" + {}"#, + actual_errorline, + ); + } + if expected_indicator != actual_indicator { + if errors.len() > 0 { + errors.push_str(" and "); + } + println!( + r#" +expected_indicator: {} + actual_indicator: {}"#, + expected_indicator, actual_indicator, + ); + errors.push_str("actual indicator does not match expected"); + } else if expected_errorline != actual_errorline { + println!( + r#" + {}"#, + actual_indicator, + ); + } + if errors.len() > 0 { + println!("{}", errors); + Err(errors) + } else { + Ok(()) + } + } + + #[test] + fn test_error_line1() { + gen_error_line_test( + " good token, bad token;", + "bad", + 30, + 10, + " ... ", + " good token, bad token;", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line2() { + gen_error_line_test( + " good token, bad token;", + "token;", + 20, + 10, + " ... ", + " ... ken, bad token;", + " ^~~~~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line2_short_ellipsis() { + gen_error_line_test( + " good token, bad token;", + "token;", + 20, + 10, + "…", + "…d token, bad token;", + " ^~~~~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line3() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 20, + 10, + " ... ", + " ... en, bad to ... ", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line3_short_ellipsis() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 20, + 10, + "…", + "…d token, bad token;", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line3_escaped_unicode_ellipsis() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 20, + 10, + "\u{2026}", + "…d token, bad token;", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line4() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 10, + 10, + " ... ", + " ... bad ... ", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line4_short_context() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 10, + 5, + " ... ", + " ... bad ... ", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line4_long_pattern() { + gen_error_line_test( + "A good token, bad token;", + "bad token", + 10, + 10, + " ... ", + " ... bad token ... ", + " ^~~~~~~~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line4_long_pattern_short_context_big_ellipsis() { + gen_error_line_test( + "A good token, bad token;", + "bad token", + 10, + 4, + " ... ", + " ... bad token ... ", + " ^~~~~~~~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line4_long_pattern_short_context_short_ellipsis() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 10, + 4, + "\u{2026}", + "…n, bad t…", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line5() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 200, + 10, + " ... ", + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + + #[test] + fn test_error_line6() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 100, + 10, + " ... ", + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + + #[test] + fn test_error_line7() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 100, + 5, + " ... ", + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + + #[test] + fn test_error_line7_more_braces() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 100, + 10, + " ... ", + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + + #[test] + fn test_error_line8() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 100, + 10, + " ... ", + r#" ... [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + + #[test] + fn test_error_line9() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 100, + 10, + " ... ", + r#" ... [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + lazy_static! { // With `ProptestConfig::failure_persistence` on by default, tests may generate the // following warnings: @@ -1680,4 +2225,85 @@ mod tests { let object_value = Object::new(vec![]); assert!(object_value.is_object()); } + + #[test] + fn test_document_exceeds_nesting_limit() { + let mut parser = Parser::new(&None); + parser.set_nesting_limit(5); + let good_buffer = r##"{ + list_of_lists_of_lists: [[[]]] +}"##; + parser.parse_from_location(&good_buffer, 8, 15).expect("should NOT exceed nesting limit"); + + let bad_buffer = r##"{ + list_of_lists_of_lists: [[[[]]]] +}"##; + let err = parser + .parse_from_location(&bad_buffer, 8, 15) + .expect_err("should exceed nesting limit"); + match err { + Error::Parse(_, message) => { + assert_eq!( + message, + r##"The given JSON5 document exceeds the parser's nesting limit of 5: + list_of_lists_of_lists: [[[[]]]] + ^"## + ) + } + _ => panic!("expected a parser error"), + } + } + + #[test] + fn test_parse_from_location_error_location() { + let filename = Some("mixed_content.md".to_string()); + let mixed_document = r##" +Mixed Content Doc +================= + +This is a document with embedded JSON5 content. + +```json5 +json5_value = { + // The next line should generate a parser error + 999, +} +``` + +End of mixed content document. +"##; + let json5_slice = + &mixed_document[mixed_document.find("{").unwrap()..mixed_document.find("}").unwrap()]; + let mut parser = Parser::new(&filename); + let err = parser + .parse_from_location(json5_slice, 8, 15) + .expect_err("check error message for location"); + match err { + Error::Parse(Some(loc), message) => { + assert_eq!(loc.file, Some("mixed_content.md".to_owned())); + assert_eq!(loc.line, 10); + assert_eq!(loc.col, 5); + assert_eq!( + message, + r##"Object values require property names: + 999, + ^~~"## + ) + } + _ => panic!("expected a parser error"), + } + } + + #[test] + fn test_doc_with_nulls() { + let mut parser = Parser::new(&None); + let buffer = "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[////[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"; + let err = parser.parse(&buffer).expect_err("should fail"); + match err { + Error::Parse(_, message) => { + assert!(message.starts_with("Mismatched braces in the document:")); + } + _ => panic!("expected a parser error"), + } + } } diff --git a/tests/lib.rs b/tests/lib.rs index 87b6f73..29b2832 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -1688,6 +1688,45 @@ fn test_parse_error_block_comment_not_closed() { .unwrap(); } +#[test] +fn test_parse_error_closing_brace_without_opening_brace() { + test_format(FormatTest { + input: r##"]"##, + error: Some( + r#"Parse error: 1:1: Closing brace without a matching opening brace: +] +^"#, + ), + ..Default::default() + }) + .unwrap(); + + test_format(FormatTest { + input: r##" + + ]"##, + error: Some( + r#"Parse error: 3:3: Closing brace without a matching opening brace: + ] + ^"#, + ), + ..Default::default() + }) + .unwrap(); + + test_format(FormatTest { + input: r##" + }"##, + error: Some( + r#"Parse error: 2:5: Invalid Object token found while parsing an Array of 0 items (mismatched braces?): + } + ^"#, + ), + ..Default::default() + }) + .unwrap(); +} + #[test] fn test_multibyte_unicode_chars() { test_format(FormatTest { @@ -1725,12 +1764,21 @@ fn test_multibyte_unicode_chars() { .unwrap(); } -fn visit_dir(dir: &Path, cb: F) -> io::Result<()> +#[test] +fn test_empty_document() { + test_format(FormatTest { options: None, input: "", expected: "", ..Default::default() }) + .unwrap(); +} + +fn visit_dir(dir: &Path, cb: &mut F) -> io::Result<()> where - F: Fn(&DirEntry) -> Result<(), std::io::Error> + Copy, + F: FnMut(&DirEntry) -> Result<(), std::io::Error>, { if !dir.is_dir() { - Err(io::Error::new(io::ErrorKind::Other, "visit_dir called with an invalid path")) + Err(io::Error::new( + io::ErrorKind::Other, + format!("visit_dir called with an invalid path: {:?}", dir), + )) } else { for entry in fs::read_dir(dir)? { let entry = entry?; @@ -1751,26 +1799,52 @@ where /// /// To manually verify test samples, use: /// cargo test test_parsing_samples_does_not_crash -- --nocapture +/// +/// To print the full error message (including the line and pointer to the +/// column), use: +/// JSON5FORMAT_TEST_FULL_ERRORS=1 cargo test test_parsing_samples_does_not_crash -- --nocapture +/// To point to a different samples directory: +/// JSON5FORMAT_TEST_SAMPLES_DIR="/tmp/fuzz_corpus" cargo test test_parsing_samples_does_not_crash #[test] fn test_parsing_samples_does_not_crash() -> Result<(), std::io::Error> { - let mut pathbuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - pathbuf.push("samples"); - visit_dir(pathbuf.as_path(), |entry| { + let mut count = 0; + let pathbuf = if let Some(samples_dir) = option_env!("JSON5FORMAT_TEST_SAMPLES_DIR") { + PathBuf::from(samples_dir) + } else { + let mut manifest_samples = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + manifest_samples.push("samples"); + manifest_samples + }; + visit_dir(pathbuf.as_path(), &mut |entry| { + count += 1; let filename = entry.path().into_os_string().to_string_lossy().to_string(); let mut buffer = String::new(); - fs::File::open(&entry.path())?.read_to_string(&mut buffer)?; + println!("{}. Parsing: {} ...", count, filename); + if let Err(err) = fs::File::open(&entry.path())?.read_to_string(&mut buffer) { + println!("Ignoring failure to read the file into a string: {:?}", err); + return Ok(()); + } let result = ParsedDocument::from_string(buffer, Some(filename.clone())); match result { Ok(_parsed_document) => { - println!("Successfully parsed: {}", filename); + println!(" ... Success"); Ok(()) } - Err(Error::Parse(_, message)) => { - println!( - "Caught input error: {}\n {}", - filename, - message.lines().next().unwrap() - ); + Err(err @ Error::Parse(..)) => { + if option_env!("JSON5FORMAT_TEST_FULL_ERRORS") == Some("1") { + println!(" ... Handled input error:\n{}", err); + } else if let Error::Parse(some_loc, message) = err { + let loc_string = if let Some(loc) = some_loc { + format!(" at {}:{}", loc.line, loc.col) + } else { + "".to_owned() + }; + let mut first_line = message.lines().next().unwrap(); + // strip the colon off the end of the first line of a parser error message + first_line = &first_line[0..first_line.len() - 1]; + println!(" ... Handled input error{}: {}", loc_string, first_line); + } + // It's OK if the input file is bad, as long as the parser fails // gracefully. Ok(())