From e86fecb649d984a3817f1084806874a7815e5a61 Mon Sep 17 00:00:00 2001 From: Rich Kadel Date: Wed, 22 Dec 2021 18:10:08 -0800 Subject: [PATCH 1/2] Limit nesting to prevent program stack overflow, and catch bad brace This PR addresses two more classes of oss-fuzz failures. 1. A document with many open braces, without closing braces, can eventually cause program stack overflow. I've added function variations to set a specific limit, and the existing functions set this value to a default limit. 2. A document that ends with an extra closing brace could crash the parser. This is now caught, and a parser error is emitted. This may have also contributed to an "out-of-memory" error, as the reproducer test (included with this PR) no longer fails. I also added better testing support for debugging the parser on sample documents (such as oss-fuzz reproducers). Fixes: #22 Fixes: #23 Fixes: #24 --- Cargo.lock | 2 +- Cargo.toml | 2 +- ...case-minimized-fuzz_parse-4641835596251136 | Bin 0 -> 174816 bytes ...case-minimized-fuzz_parse-4802677486780416 | 1 + ...case-minimized-fuzz_parse-4993106563956736 | Bin 0 -> 198 bytes ...case-minimized-fuzz_parse-6541106597199872 | Bin 0 -> 1515 bytes src/content.rs | 12 ++ src/parser.rs | 138 +++++++++++++++++- tests/lib.rs | 102 +++++++++++-- 9 files changed, 238 insertions(+), 19 deletions(-) create mode 100644 samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4641835596251136 create mode 100644 samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4802677486780416 create mode 100644 samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4993106563956736 create mode 100644 samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-6541106597199872 diff --git a/Cargo.lock b/Cargo.lock index cd0d63d..d661f48 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -149,7 +149,7 @@ dependencies = [ [[package]] name = "json5format" -version = "0.2.1" +version = "0.2.2" dependencies = [ "anyhow", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index 49eef91..7b3a147 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json5format" -version = "0.2.1" +version = "0.2.2" authors = [ "Rich Kadel ", "David Tamas-Parris ", diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4641835596251136 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4641835596251136 new file mode 100644 index 0000000000000000000000000000000000000000..c9b34afae2f83bd8ad9c045e381139ab67b7c0c9 GIT binary patch literal 174816 zcmeIvp%KC`6b9g6y9p{DgJOzYC&3hfV**qKiUsPs3C9RsfP@|u914-E_+Bm%x%|ny zFUP|!#<Otl~I5CH-N z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!8B22oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF v5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5*BD=_t^OU^#exA}&t@9ymj3in`1 literal 0 HcmV?d00001 diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4802677486780416 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4802677486780416 new file mode 100644 index 0000000..c58eee6 --- /dev/null +++ b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4802677486780416 @@ -0,0 +1 @@ +]} \ No newline at end of file diff --git a/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4993106563956736 b/samples/fuzz_fails_fixed/clusterfuzz-testcase-minimized-fuzz_parse-4993106563956736 new file mode 100644 index 0000000000000000000000000000000000000000..04bb6c554d175e62f6fa34a81a310cc5b6184d16 GIT binary patch literal 198 zcma)#K?;B{3sM3LTBf)ipVN zc0UyFJ6YP-vZ2+Fw=>vtd)b_{SWnKo9l#R6?EvPX(u=%511x}VIVbbvVAA2Tzodt9 nvutkrl`^6R03kF$-SZ2(NEVI%TGI7*l) -> Result { + Self::from_str_with_nesting_limit(buffer, filename, Parser::DEFAULT_NESTING_LIMIT) + } + + /// Like `from_str()` but also overrides the default nesting limit, used to + /// catch deeply nested JSON5 documents before overflowing the program + /// stack. + pub fn from_str_with_nesting_limit( + buffer: &str, + filename: Option, + nesting_limit: usize, + ) -> Result { let mut parser = Parser::new(&filename); + parser.set_nesting_limit(nesting_limit); let content = parser.parse(&buffer)?; Ok(Self { owned_buffer: None, filename, content }) diff --git a/src/parser.rs b/src/parser.rs index dc1e2fd..053f80d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -227,11 +227,18 @@ pub(crate) struct Parser<'parser> { /// and so on. scope_stack: Vec>>, + /// To avoid accidentally overflowing the program stack, limit the number of + /// nested scopes and generate an error if it is exceeded. + nesting_limit: usize, + /// Captures a colon token when expected. colon_capturer: Capturer, } impl<'parser> Parser<'parser> { + /// The default limit of nested scopes when parsing a JSON5 document. + pub const DEFAULT_NESTING_LIMIT: usize = 1000; + pub fn new(filename: &'parser Option) -> Self { let remaining = ""; let current_line = &remaining; @@ -244,11 +251,19 @@ impl<'parser> Parser<'parser> { column_number: 1, next_line_number: 1, next_column_number: 1, - scope_stack: vec![Rc::new(RefCell::new(Array::new(vec![])))], + scope_stack: Vec::default(), + nesting_limit: Self::DEFAULT_NESTING_LIMIT, colon_capturer: Capturer::new(&COLON), } } + /// To avoid accidentally overflowing the program stack, there is a mutable + /// limit on the number of nested scopes allowed. If this limit is exceeded + /// while parsing a document, a parser error is generated. + pub fn set_nesting_limit(&mut self, new_limit: usize) { + self.nesting_limit = new_limit; + } + fn current_scope(&self) -> Rc> { assert!(self.scope_stack.len() > 0); self.scope_stack.last().unwrap().clone() @@ -311,6 +326,12 @@ impl<'parser> Parser<'parser> { self.with_container(|container| container.add_value(value_ref.clone(), self))?; if is_container { self.scope_stack.push(value_ref.clone()); + if self.scope_stack.len() > self.nesting_limit { + return Err(self.error(format!( + "The given JSON5 document exceeds the parser's nesting limit of {}", + self.nesting_limit + ))); + } } Ok(()) } @@ -483,7 +504,11 @@ impl<'parser> Parser<'parser> { fn exit_scope(&mut self) -> Result<(), Error> { self.scope_stack.pop(); - Ok(()) + if self.scope_stack.is_empty() { + Err(self.error("Closing brace without a matching opening brace")) + } else { + Ok(()) + } } fn close_object(&mut self) -> Result<(), Error> { @@ -515,7 +540,7 @@ impl<'parser> Parser<'parser> { indicator += &"~".repeat(if self.line_number == self.next_line_number { self.next_column_number - self.column_number - 1 } else { - self.current_line.len() - self.column_number + 0 }); } Error::parse(self.location(), format!("{}:\n{}\n{}", err, self.current_line, indicator)) @@ -563,8 +588,34 @@ impl<'parser> Parser<'parser> { } } + /// Parse the given document string as a JSON5 document containing Array + /// elements (with implicit outer braces). Document locations (use in, for + /// example, error messages), are 1-based and start at line 1, column 1. pub fn parse(&mut self, buffer: &'parser str) -> Result { + self.parse_from_location(buffer, 1, 1) + } + + /// Parse the given document string as a JSON5 document containing Array + /// elements (with implicit outer braces), and use the given 1-based line + /// and column numbers when referring to document locations. + pub fn parse_from_location( + &mut self, + buffer: &'parser str, + starting_line_number: usize, + starting_column_number: usize, + ) -> Result { self.remaining = buffer; + self.current_line = &self.remaining; + + assert!(starting_line_number > 0, "document line numbers are 1-based"); + self.next_line_number = starting_line_number; + self.next_column_number = starting_column_number; + + self.next_line = self.current_line; + self.line_number = self.next_line_number - 1; + self.column_number = self.next_column_number - 1; + self.scope_stack = vec![Rc::new(RefCell::new(Array::new(vec![])))]; + let mut next_token = Capturer::new(&NEXT_TOKEN); let mut single_quoted = Capturer::new(&SINGLE_QUOTED); let mut double_quoted = Capturer::new(&DOUBLE_QUOTED); @@ -1680,4 +1731,85 @@ mod tests { let object_value = Object::new(vec![]); assert!(object_value.is_object()); } + + #[test] + fn test_document_exceeds_nesting_limit() { + let mut parser = Parser::new(&None); + parser.set_nesting_limit(5); + let good_buffer = r##"{ + list_of_lists_of_lists: [[[]]] +}"##; + parser.parse_from_location(&good_buffer, 8, 15).expect("should NOT exceed nesting limit"); + + let bad_buffer = r##"{ + list_of_lists_of_lists: [[[[]]]] +}"##; + let err = parser + .parse_from_location(&bad_buffer, 8, 15) + .expect_err("should exceed nesting limit"); + match err { + Error::Parse(_, message) => { + assert_eq!( + message, + r##"The given JSON5 document exceeds the parser's nesting limit of 5: + list_of_lists_of_lists: [[[[]]]] + ^"## + ) + } + _ => panic!("expected a parser error"), + } + } + + #[test] + fn test_parse_from_location_error_location() { + let filename = Some("mixed_content.md".to_string()); + let mixed_document = r##" +Mixed Content Doc +================= + +This is a document with embedded JSON5 content. + +```json5 +json5_value = { + // The next line should generate a parser error + 999, +} +``` + +End of mixed content document. +"##; + let json5_slice = + &mixed_document[mixed_document.find("{").unwrap()..mixed_document.find("}").unwrap()]; + let mut parser = Parser::new(&filename); + let err = parser + .parse_from_location(json5_slice, 8, 15) + .expect_err("check error message for location"); + match err { + Error::Parse(Some(loc), message) => { + assert_eq!(loc.file, Some("mixed_content.md".to_owned())); + assert_eq!(loc.line, 10); + assert_eq!(loc.col, 5); + assert_eq!( + message, + r##"Object values require property names: + 999, + ^~~"## + ) + } + _ => panic!("expected a parser error"), + } + } + + #[test] + fn test_doc_with_nulls() { + let mut parser = Parser::new(&None); + let buffer = "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[////[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"; + let err = parser.parse(&buffer).expect_err("should fail"); + match err { + Error::Parse(_, message) => { + assert!(message.starts_with("Mismatched braces in the document:")); + } + _ => panic!("expected a parser error"), + } + } } diff --git a/tests/lib.rs b/tests/lib.rs index 87b6f73..29b2832 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -1688,6 +1688,45 @@ fn test_parse_error_block_comment_not_closed() { .unwrap(); } +#[test] +fn test_parse_error_closing_brace_without_opening_brace() { + test_format(FormatTest { + input: r##"]"##, + error: Some( + r#"Parse error: 1:1: Closing brace without a matching opening brace: +] +^"#, + ), + ..Default::default() + }) + .unwrap(); + + test_format(FormatTest { + input: r##" + + ]"##, + error: Some( + r#"Parse error: 3:3: Closing brace without a matching opening brace: + ] + ^"#, + ), + ..Default::default() + }) + .unwrap(); + + test_format(FormatTest { + input: r##" + }"##, + error: Some( + r#"Parse error: 2:5: Invalid Object token found while parsing an Array of 0 items (mismatched braces?): + } + ^"#, + ), + ..Default::default() + }) + .unwrap(); +} + #[test] fn test_multibyte_unicode_chars() { test_format(FormatTest { @@ -1725,12 +1764,21 @@ fn test_multibyte_unicode_chars() { .unwrap(); } -fn visit_dir(dir: &Path, cb: F) -> io::Result<()> +#[test] +fn test_empty_document() { + test_format(FormatTest { options: None, input: "", expected: "", ..Default::default() }) + .unwrap(); +} + +fn visit_dir(dir: &Path, cb: &mut F) -> io::Result<()> where - F: Fn(&DirEntry) -> Result<(), std::io::Error> + Copy, + F: FnMut(&DirEntry) -> Result<(), std::io::Error>, { if !dir.is_dir() { - Err(io::Error::new(io::ErrorKind::Other, "visit_dir called with an invalid path")) + Err(io::Error::new( + io::ErrorKind::Other, + format!("visit_dir called with an invalid path: {:?}", dir), + )) } else { for entry in fs::read_dir(dir)? { let entry = entry?; @@ -1751,26 +1799,52 @@ where /// /// To manually verify test samples, use: /// cargo test test_parsing_samples_does_not_crash -- --nocapture +/// +/// To print the full error message (including the line and pointer to the +/// column), use: +/// JSON5FORMAT_TEST_FULL_ERRORS=1 cargo test test_parsing_samples_does_not_crash -- --nocapture +/// To point to a different samples directory: +/// JSON5FORMAT_TEST_SAMPLES_DIR="/tmp/fuzz_corpus" cargo test test_parsing_samples_does_not_crash #[test] fn test_parsing_samples_does_not_crash() -> Result<(), std::io::Error> { - let mut pathbuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - pathbuf.push("samples"); - visit_dir(pathbuf.as_path(), |entry| { + let mut count = 0; + let pathbuf = if let Some(samples_dir) = option_env!("JSON5FORMAT_TEST_SAMPLES_DIR") { + PathBuf::from(samples_dir) + } else { + let mut manifest_samples = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + manifest_samples.push("samples"); + manifest_samples + }; + visit_dir(pathbuf.as_path(), &mut |entry| { + count += 1; let filename = entry.path().into_os_string().to_string_lossy().to_string(); let mut buffer = String::new(); - fs::File::open(&entry.path())?.read_to_string(&mut buffer)?; + println!("{}. Parsing: {} ...", count, filename); + if let Err(err) = fs::File::open(&entry.path())?.read_to_string(&mut buffer) { + println!("Ignoring failure to read the file into a string: {:?}", err); + return Ok(()); + } let result = ParsedDocument::from_string(buffer, Some(filename.clone())); match result { Ok(_parsed_document) => { - println!("Successfully parsed: {}", filename); + println!(" ... Success"); Ok(()) } - Err(Error::Parse(_, message)) => { - println!( - "Caught input error: {}\n {}", - filename, - message.lines().next().unwrap() - ); + Err(err @ Error::Parse(..)) => { + if option_env!("JSON5FORMAT_TEST_FULL_ERRORS") == Some("1") { + println!(" ... Handled input error:\n{}", err); + } else if let Error::Parse(some_loc, message) = err { + let loc_string = if let Some(loc) = some_loc { + format!(" at {}:{}", loc.line, loc.col) + } else { + "".to_owned() + }; + let mut first_line = message.lines().next().unwrap(); + // strip the colon off the end of the first line of a parser error message + first_line = &first_line[0..first_line.len() - 1]; + println!(" ... Handled input error{}: {}", loc_string, first_line); + } + // It's OK if the input file is bad, as long as the parser fails // gracefully. Ok(()) From 3bf19bf7e4d3b01ce67a4c8307164e101b894913 Mon Sep 17 00:00:00 2001 From: Rich Kadel Date: Tue, 28 Dec 2021 08:16:24 -0800 Subject: [PATCH 2/2] Improve error message content if on long lines Improved display of error messages that need to highlight an error on an exceptionally long line (like thousands of opening braces). The error message generator now trims long lines, and still, hopefully always, show an indicator where the error is on that line. Fixes: #26 --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/parser.rs | 512 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 505 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d661f48..10365c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -149,7 +149,7 @@ dependencies = [ [[package]] name = "json5format" -version = "0.2.2" +version = "0.2.3" dependencies = [ "anyhow", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index 7b3a147..5a29df3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json5format" -version = "0.2.2" +version = "0.2.3" authors = [ "Rich Kadel ", "David Tamas-Parris ", diff --git a/src/parser.rs b/src/parser.rs index 053f80d..0180b47 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -197,6 +197,42 @@ impl Capturer { } } +/// This internal struct holds the information needed to print a +/// contextually-relevant portion of the line (if not the entire line) where a +/// parser error was caught, the first character of the error on that line, and +/// the number of characters from that initial character index (1 or more) to +/// highlight as being part of the error. +struct ParserErrorContext { + /// The error line to be printed with a parser error. + line: String, + + /// The starting character of the error (zero-based index). + indicator_start: usize, + + /// The number of characters to highlight, including the character at the + /// `indicator_start` (at least 1). + indicator_len: usize, +} + +impl ParserErrorContext { + fn new(line: String, indicator_start: usize, indicator_len: usize) -> Self { + assert!(indicator_len >= 1); + Self { line, indicator_start, indicator_len } + } + + fn line(&self) -> &str { + &self.line + } + + fn indicator(&self) -> String { + let mut line = " ".repeat(self.indicator_start) + "^"; + if self.indicator_len > 1 { + line += &"~".repeat(self.indicator_len - 1); + } + line + } +} + pub(crate) struct Parser<'parser> { /// The remaining text in the input buffer since the last capture. remaining: &'parser str, @@ -535,15 +571,14 @@ impl<'parser> Parser<'parser> { } pub fn error(&self, err: impl std::fmt::Display) -> Error { - let mut indicator = " ".repeat(self.column_number - 1) + "^"; - if self.column_number < self.next_column_number - 1 { - indicator += &"~".repeat(if self.line_number == self.next_line_number { - self.next_column_number - self.column_number - 1 - } else { - 0 - }); - } - Error::parse(self.location(), format!("{}:\n{}\n{}", err, self.current_line, indicator)) + const MAX_ERROR_LINE_LEN: usize = 200; + const MIN_CONTEXT_LEN: usize = 10; + const ELLIPSIS: &str = "\u{2026}"; + let error_context = self.get_error_context(MAX_ERROR_LINE_LEN, MIN_CONTEXT_LEN, ELLIPSIS); + Error::parse( + self.location(), + format!("{}:\n{}\n{}", err, error_context.line(), error_context.indicator()), + ) } fn consume_if_matched<'a>(&mut self, matched: Option>) -> bool { @@ -728,12 +763,471 @@ impl<'parser> Parser<'parser> { Err(self.error("Mismatched braces in the document")) } } + + /// Returns the given `current_line` and an `indicator` line: spaces, followed + /// by a carat (`^`) that points at the given `column_number`, followed by + /// tilde's (`~`) as long as the error token. + /// + /// If the line is longer than a set maximum length, the line is trimmed and + /// the indicator positions are adjusted. + fn get_error_context( + &self, + max_error_line_len: usize, + min_context_len: usize, + ellipsis: &str, + ) -> ParserErrorContext { + let indicator_len = if self.line_number == self.next_line_number { + std::cmp::max(self.next_column_number - self.column_number, 1) + } else { + 1 + }; + + // `indicator_start` is a 0-based char position + let indicator_start = self.column_number - 1; + + let error_line_len = self.current_line.chars().count(); + if error_line_len <= max_error_line_len { + ParserErrorContext::new(self.current_line.to_owned(), indicator_start, indicator_len) + } else { + trim_error_line_and_indicator( + self.current_line, + indicator_start, + indicator_len, + error_line_len, + max_error_line_len, + min_context_len, + ellipsis, + ) + } + } +} + +struct CharRange { + range: std::ops::Range, +} + +impl CharRange { + fn new(range: std::ops::Range) -> Self { + Self { range } + } + + fn to_byte_range(self, from_string: &str) -> Option> { + let char_len = from_string.chars().count(); + let mut some_start_byte = + if self.range.start == char_len { Some(from_string.len()) } else { None }; + let mut some_end_byte = + if self.range.end == char_len { Some(from_string.len()) } else { None }; + if let (Some(start_byte), Some(end_byte)) = (some_start_byte, some_end_byte) { + return Some(start_byte..end_byte); + } + for (char_pos, (byte_pos, _char)) in from_string.char_indices().enumerate() { + if char_pos == self.range.start { + if let Some(end_byte) = some_end_byte { + return Some(byte_pos..end_byte); + } + some_start_byte = Some(byte_pos); + } + if char_pos == self.range.end { + if let Some(start_byte) = some_start_byte { + return Some(start_byte..byte_pos); + } + some_end_byte = Some(byte_pos); + } + } + None + } +} + +fn trim_error_line_and_indicator( + error_line: &str, + indicator_start: usize, + mut indicator_len: usize, + error_line_len: usize, + max_error_line_len: usize, + min_context_len: usize, + ellipsis: &str, +) -> ParserErrorContext { + let ellipsis_len = ellipsis.chars().count(); + + assert!(max_error_line_len > ellipsis_len); + assert!(max_error_line_len < error_line_len); + assert!(indicator_start <= error_line_len); + assert!(indicator_len == 1 || (indicator_start + indicator_len) <= error_line_len); + + indicator_len = std::cmp::min(indicator_len, max_error_line_len); + + let min_right_context_len = std::cmp::max(min_context_len, indicator_len); + + let context_end = + std::cmp::min(indicator_start + min_right_context_len, error_line_len - ellipsis_len); + if context_end < max_error_line_len - ellipsis_len { + let slice_bytes = CharRange::new(0..(max_error_line_len - ellipsis_len)) + .to_byte_range(error_line) + .expect("char indices should map to String bytes"); + return ParserErrorContext::new( + error_line[slice_bytes].to_string() + ellipsis, + indicator_start, + indicator_len, + ); + } + + let context_start = indicator_start - std::cmp::min(indicator_start, min_context_len); + if error_line_len - context_start < max_error_line_len - ellipsis_len { + let start_char = error_line_len - (max_error_line_len - ellipsis_len); + let slice_bytes = CharRange::new(start_char..error_line_len) + .to_byte_range(error_line) + .expect("char indices should map to String bytes"); + return ParserErrorContext::new( + ellipsis.to_owned() + &error_line[slice_bytes], + (indicator_start + ellipsis_len) - start_char, + indicator_len, + ); + } + + let margin_chars = + max_error_line_len - std::cmp::min(max_error_line_len, (ellipsis_len * 2) + indicator_len); + let right_margin = std::cmp::min( + error_line_len - std::cmp::min(error_line_len, indicator_start + indicator_len), + margin_chars / 2, + ); + let left_margin = margin_chars - right_margin; + let mut start_char = indicator_start - left_margin; + let mut end_char = + std::cmp::min(indicator_start + indicator_len + right_margin, error_line_len); + let mut start_ellipsis = ellipsis; + let mut end_ellipsis = ellipsis; + if start_char == 0 { + start_ellipsis = ""; + end_char += ellipsis_len; + } else if end_char == error_line_len { + end_ellipsis = ""; + start_char -= ellipsis_len; + } + + let slice_bytes = CharRange::new(start_char..end_char) + .to_byte_range(error_line) + .expect("char indices should map to String bytes"); + ParserErrorContext::new( + start_ellipsis.to_owned() + &error_line[slice_bytes] + end_ellipsis, + (indicator_start + ellipsis_len) - start_char, + indicator_len, + ) } #[cfg(test)] mod tests { use {super::*, crate::test_error, proptest::prelude::*}; + fn gen_error_line_test( + error_line: &str, + pattern: &str, + max_error_line_len: usize, + min_context_len: usize, + ellipsis: &str, + expected_errorline: &str, + expected_indicator: &str, + ) -> Result<(), String> { + let some_newline = pattern.find("\n"); + let pattern_line1 = + if let Some(newline) = some_newline { &pattern[0..newline] } else { &pattern }; + assert!(pattern_line1.len() > 0); + let indicator_start = error_line.find(pattern_line1).expect("pattern not found in line"); + let end = indicator_start + pattern.len(); + let indicator_len = end - indicator_start; + let error_context = if error_line.chars().count() <= max_error_line_len { + ParserErrorContext::new(error_line.to_owned(), indicator_start, indicator_len) + } else { + trim_error_line_and_indicator( + error_line, + indicator_start, + indicator_len, + error_line.chars().count(), + max_error_line_len, + min_context_len, + ellipsis, + ) + }; + let actual_errorline = error_context.line(); + let actual_indicator = error_context.indicator(); + let mut errors = String::new(); + if expected_errorline != actual_errorline { + println!( + r#" +expected_errorline: >>>{}<<< (charlen={}) + actual_errorline: >>>{}<<< (charlen={} of {}, min context len={})"#, + expected_errorline, + expected_errorline.chars().count(), + actual_errorline, + actual_errorline.chars().count(), + max_error_line_len, + min_context_len, + ); + errors.push_str("actual errorline does not match expected"); + } else if expected_indicator != actual_indicator { + println!( + r#" + {}"#, + actual_errorline, + ); + } + if expected_indicator != actual_indicator { + if errors.len() > 0 { + errors.push_str(" and "); + } + println!( + r#" +expected_indicator: {} + actual_indicator: {}"#, + expected_indicator, actual_indicator, + ); + errors.push_str("actual indicator does not match expected"); + } else if expected_errorline != actual_errorline { + println!( + r#" + {}"#, + actual_indicator, + ); + } + if errors.len() > 0 { + println!("{}", errors); + Err(errors) + } else { + Ok(()) + } + } + + #[test] + fn test_error_line1() { + gen_error_line_test( + " good token, bad token;", + "bad", + 30, + 10, + " ... ", + " good token, bad token;", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line2() { + gen_error_line_test( + " good token, bad token;", + "token;", + 20, + 10, + " ... ", + " ... ken, bad token;", + " ^~~~~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line2_short_ellipsis() { + gen_error_line_test( + " good token, bad token;", + "token;", + 20, + 10, + "…", + "…d token, bad token;", + " ^~~~~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line3() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 20, + 10, + " ... ", + " ... en, bad to ... ", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line3_short_ellipsis() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 20, + 10, + "…", + "…d token, bad token;", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line3_escaped_unicode_ellipsis() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 20, + 10, + "\u{2026}", + "…d token, bad token;", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line4() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 10, + 10, + " ... ", + " ... bad ... ", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line4_short_context() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 10, + 5, + " ... ", + " ... bad ... ", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line4_long_pattern() { + gen_error_line_test( + "A good token, bad token;", + "bad token", + 10, + 10, + " ... ", + " ... bad token ... ", + " ^~~~~~~~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line4_long_pattern_short_context_big_ellipsis() { + gen_error_line_test( + "A good token, bad token;", + "bad token", + 10, + 4, + " ... ", + " ... bad token ... ", + " ^~~~~~~~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line4_long_pattern_short_context_short_ellipsis() { + gen_error_line_test( + "A good token, bad token;", + "bad", + 10, + 4, + "\u{2026}", + "…n, bad t…", + " ^~~", + ) + .expect("actual should match expected"); + } + + #[test] + fn test_error_line5() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 200, + 10, + " ... ", + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + + #[test] + fn test_error_line6() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 100, + 10, + " ... ", + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + + #[test] + fn test_error_line7() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 100, + 5, + " ... ", + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + + #[test] + fn test_error_line7_more_braces() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 100, + 10, + " ... ", + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + + #[test] + fn test_error_line8() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 100, + 10, + " ... ", + r#" ... [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + + #[test] + fn test_error_line9() { + gen_error_line_test( + r#"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"#, + "a_prop", + 100, + 10, + " ... ", + r#" ... [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[{ not a_prop: "value" }]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] ... "#, + r#" ^~~~~~"#, + ).expect("actual should match expected"); + } + lazy_static! { // With `ProptestConfig::failure_persistence` on by default, tests may generate the // following warnings: