Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "json5format"
version = "0.2.1"
version = "0.2.2"
authors = [
"Rich Kadel <richkadel@google.com>",
"David Tamas-Parris <davidatp@google.com>",
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
]}
Binary file not shown.
Binary file not shown.
12 changes: 12 additions & 0 deletions src/content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,19 @@ impl ParsedDocument {
/// If a filename is also provided, any parsing errors will include the filename with the line
/// number and column where the error was encountered.
pub fn from_str(buffer: &str, filename: Option<String>) -> Result<Self, Error> {
Self::from_str_with_nesting_limit(buffer, filename, Parser::DEFAULT_NESTING_LIMIT)
}

/// Like `from_str()` but also overrides the default nesting limit, used to
/// catch deeply nested JSON5 documents before overflowing the program
/// stack.
pub fn from_str_with_nesting_limit(
buffer: &str,
filename: Option<String>,
nesting_limit: usize,
) -> Result<Self, Error> {
let mut parser = Parser::new(&filename);
parser.set_nesting_limit(nesting_limit);
let content = parser.parse(&buffer)?;

Ok(Self { owned_buffer: None, filename, content })
Expand Down
138 changes: 135 additions & 3 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,18 @@ pub(crate) struct Parser<'parser> {
/// and so on.
scope_stack: Vec<Rc<RefCell<Value>>>,

/// To avoid accidentally overflowing the program stack, limit the number of
/// nested scopes and generate an error if it is exceeded.
nesting_limit: usize,

/// Captures a colon token when expected.
colon_capturer: Capturer,
}

impl<'parser> Parser<'parser> {
/// The default limit of nested scopes when parsing a JSON5 document.
pub const DEFAULT_NESTING_LIMIT: usize = 1000;

pub fn new(filename: &'parser Option<String>) -> Self {
let remaining = "";
let current_line = &remaining;
Expand All @@ -244,11 +251,19 @@ impl<'parser> Parser<'parser> {
column_number: 1,
next_line_number: 1,
next_column_number: 1,
scope_stack: vec![Rc::new(RefCell::new(Array::new(vec![])))],
scope_stack: Vec::default(),
nesting_limit: Self::DEFAULT_NESTING_LIMIT,
colon_capturer: Capturer::new(&COLON),
}
}

/// To avoid accidentally overflowing the program stack, there is a mutable
/// limit on the number of nested scopes allowed. If this limit is exceeded
/// while parsing a document, a parser error is generated.
pub fn set_nesting_limit(&mut self, new_limit: usize) {
self.nesting_limit = new_limit;
}

fn current_scope(&self) -> Rc<RefCell<Value>> {
assert!(self.scope_stack.len() > 0);
self.scope_stack.last().unwrap().clone()
Expand Down Expand Up @@ -311,6 +326,12 @@ impl<'parser> Parser<'parser> {
self.with_container(|container| container.add_value(value_ref.clone(), self))?;
if is_container {
self.scope_stack.push(value_ref.clone());
if self.scope_stack.len() > self.nesting_limit {
return Err(self.error(format!(
"The given JSON5 document exceeds the parser's nesting limit of {}",
self.nesting_limit
)));
}
}
Ok(())
}
Expand Down Expand Up @@ -483,7 +504,11 @@ impl<'parser> Parser<'parser> {

fn exit_scope(&mut self) -> Result<(), Error> {
self.scope_stack.pop();
Ok(())
if self.scope_stack.is_empty() {
Err(self.error("Closing brace without a matching opening brace"))
} else {
Ok(())
}
}

fn close_object(&mut self) -> Result<(), Error> {
Expand Down Expand Up @@ -515,7 +540,7 @@ impl<'parser> Parser<'parser> {
indicator += &"~".repeat(if self.line_number == self.next_line_number {
self.next_column_number - self.column_number - 1
} else {
self.current_line.len() - self.column_number
0
});
}
Error::parse(self.location(), format!("{}:\n{}\n{}", err, self.current_line, indicator))
Expand Down Expand Up @@ -563,8 +588,34 @@ impl<'parser> Parser<'parser> {
}
}

/// Parse the given document string as a JSON5 document containing Array
/// elements (with implicit outer braces). Document locations (use in, for
/// example, error messages), are 1-based and start at line 1, column 1.
pub fn parse(&mut self, buffer: &'parser str) -> Result<Array, Error> {
self.parse_from_location(buffer, 1, 1)
}

/// Parse the given document string as a JSON5 document containing Array
/// elements (with implicit outer braces), and use the given 1-based line
/// and column numbers when referring to document locations.
pub fn parse_from_location(
&mut self,
buffer: &'parser str,
starting_line_number: usize,
starting_column_number: usize,
) -> Result<Array, Error> {
self.remaining = buffer;
self.current_line = &self.remaining;

assert!(starting_line_number > 0, "document line numbers are 1-based");
self.next_line_number = starting_line_number;
self.next_column_number = starting_column_number;

self.next_line = self.current_line;
self.line_number = self.next_line_number - 1;
self.column_number = self.next_column_number - 1;
self.scope_stack = vec![Rc::new(RefCell::new(Array::new(vec![])))];

let mut next_token = Capturer::new(&NEXT_TOKEN);
let mut single_quoted = Capturer::new(&SINGLE_QUOTED);
let mut double_quoted = Capturer::new(&DOUBLE_QUOTED);
Expand Down Expand Up @@ -1680,4 +1731,85 @@ mod tests {
let object_value = Object::new(vec![]);
assert!(object_value.is_object());
}

#[test]
fn test_document_exceeds_nesting_limit() {
let mut parser = Parser::new(&None);
parser.set_nesting_limit(5);
let good_buffer = r##"{
list_of_lists_of_lists: [[[]]]
}"##;
parser.parse_from_location(&good_buffer, 8, 15).expect("should NOT exceed nesting limit");

let bad_buffer = r##"{
list_of_lists_of_lists: [[[[]]]]
}"##;
let err = parser
.parse_from_location(&bad_buffer, 8, 15)
.expect_err("should exceed nesting limit");
match err {
Error::Parse(_, message) => {
assert_eq!(
message,
r##"The given JSON5 document exceeds the parser's nesting limit of 5:
list_of_lists_of_lists: [[[[]]]]
^"##
)
}
_ => panic!("expected a parser error"),
}
}

#[test]
fn test_parse_from_location_error_location() {
let filename = Some("mixed_content.md".to_string());
let mixed_document = r##"
Mixed Content Doc
=================

This is a document with embedded JSON5 content.

```json5
json5_value = {
// The next line should generate a parser error
999,
}
```

End of mixed content document.
"##;
let json5_slice =
&mixed_document[mixed_document.find("{").unwrap()..mixed_document.find("}").unwrap()];
let mut parser = Parser::new(&filename);
let err = parser
.parse_from_location(json5_slice, 8, 15)
.expect_err("check error message for location");
match err {
Error::Parse(Some(loc), message) => {
assert_eq!(loc.file, Some("mixed_content.md".to_owned()));
assert_eq!(loc.line, 10);
assert_eq!(loc.col, 5);
assert_eq!(
message,
r##"Object values require property names:
999,
^~~"##
)
}
_ => panic!("expected a parser error"),
}
}

#[test]
fn test_doc_with_nulls() {
let mut parser = Parser::new(&None);
let buffer = "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[////[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}\u{000}]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]";
let err = parser.parse(&buffer).expect_err("should fail");
match err {
Error::Parse(_, message) => {
assert!(message.starts_with("Mismatched braces in the document:"));
}
_ => panic!("expected a parser error"),
}
}
}
102 changes: 88 additions & 14 deletions tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1688,6 +1688,45 @@ fn test_parse_error_block_comment_not_closed() {
.unwrap();
}

#[test]
fn test_parse_error_closing_brace_without_opening_brace() {
test_format(FormatTest {
input: r##"]"##,
error: Some(
r#"Parse error: 1:1: Closing brace without a matching opening brace:
]
^"#,
),
..Default::default()
})
.unwrap();

test_format(FormatTest {
input: r##"

]"##,
error: Some(
r#"Parse error: 3:3: Closing brace without a matching opening brace:
]
^"#,
),
..Default::default()
})
.unwrap();

test_format(FormatTest {
input: r##"
}"##,
error: Some(
r#"Parse error: 2:5: Invalid Object token found while parsing an Array of 0 items (mismatched braces?):
}
^"#,
),
..Default::default()
})
.unwrap();
}

#[test]
fn test_multibyte_unicode_chars() {
test_format(FormatTest {
Expand Down Expand Up @@ -1725,12 +1764,21 @@ fn test_multibyte_unicode_chars() {
.unwrap();
}

fn visit_dir<F>(dir: &Path, cb: F) -> io::Result<()>
#[test]
fn test_empty_document() {
test_format(FormatTest { options: None, input: "", expected: "", ..Default::default() })
.unwrap();
}

fn visit_dir<F>(dir: &Path, cb: &mut F) -> io::Result<()>
where
F: Fn(&DirEntry) -> Result<(), std::io::Error> + Copy,
F: FnMut(&DirEntry) -> Result<(), std::io::Error>,
{
if !dir.is_dir() {
Err(io::Error::new(io::ErrorKind::Other, "visit_dir called with an invalid path"))
Err(io::Error::new(
io::ErrorKind::Other,
format!("visit_dir called with an invalid path: {:?}", dir),
))
} else {
for entry in fs::read_dir(dir)? {
let entry = entry?;
Expand All @@ -1751,26 +1799,52 @@ where
///
/// To manually verify test samples, use:
/// cargo test test_parsing_samples_does_not_crash -- --nocapture
///
/// To print the full error message (including the line and pointer to the
/// column), use:
/// JSON5FORMAT_TEST_FULL_ERRORS=1 cargo test test_parsing_samples_does_not_crash -- --nocapture
/// To point to a different samples directory:
/// JSON5FORMAT_TEST_SAMPLES_DIR="/tmp/fuzz_corpus" cargo test test_parsing_samples_does_not_crash
#[test]
fn test_parsing_samples_does_not_crash() -> Result<(), std::io::Error> {
let mut pathbuf = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
pathbuf.push("samples");
visit_dir(pathbuf.as_path(), |entry| {
let mut count = 0;
let pathbuf = if let Some(samples_dir) = option_env!("JSON5FORMAT_TEST_SAMPLES_DIR") {
PathBuf::from(samples_dir)
} else {
let mut manifest_samples = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
manifest_samples.push("samples");
manifest_samples
};
visit_dir(pathbuf.as_path(), &mut |entry| {
count += 1;
let filename = entry.path().into_os_string().to_string_lossy().to_string();
let mut buffer = String::new();
fs::File::open(&entry.path())?.read_to_string(&mut buffer)?;
println!("{}. Parsing: {} ...", count, filename);
if let Err(err) = fs::File::open(&entry.path())?.read_to_string(&mut buffer) {
println!("Ignoring failure to read the file into a string: {:?}", err);
return Ok(());
}
let result = ParsedDocument::from_string(buffer, Some(filename.clone()));
match result {
Ok(_parsed_document) => {
println!("Successfully parsed: {}", filename);
println!(" ... Success");
Ok(())
}
Err(Error::Parse(_, message)) => {
println!(
"Caught input error: {}\n {}",
filename,
message.lines().next().unwrap()
);
Err(err @ Error::Parse(..)) => {
if option_env!("JSON5FORMAT_TEST_FULL_ERRORS") == Some("1") {
println!(" ... Handled input error:\n{}", err);
} else if let Error::Parse(some_loc, message) = err {
let loc_string = if let Some(loc) = some_loc {
format!(" at {}:{}", loc.line, loc.col)
} else {
"".to_owned()
};
let mut first_line = message.lines().next().unwrap();
// strip the colon off the end of the first line of a parser error message
first_line = &first_line[0..first_line.len() - 1];
println!(" ... Handled input error{}: {}", loc_string, first_line);
}

// It's OK if the input file is bad, as long as the parser fails
// gracefully.
Ok(())
Expand Down