From 912a0506355998343ce65ae6381f7deb5e20505b Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 24 Jun 2023 11:37:17 -0700 Subject: [PATCH 1/3] Add test of parsing whitespace --- tests/test.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test.rs b/tests/test.rs index 42b6317..bea47ea 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -694,6 +694,15 @@ fn check_spans_internal(ts: TokenStream, lines: &mut &[(usize, usize, usize, usi } } +#[test] +fn whitespace() { + // space, horizontal tab, vertical tab, form feed, carriage return, line + // feed, non-breaking space, left-to-right mark, right-to-left mark + let various_spaces = " \t\u{b}\u{c}\r\n\u{a0}\u{200e}\u{200f}"; + let tokens = various_spaces.parse::().unwrap(); + assert_eq!(tokens.into_iter().count(), 0); +} + #[test] fn byte_order_mark() { let string = "\u{feff}foo"; From 8c99c83ca7beaf478103ed621c4fd45414412e56 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 24 Jun 2023 11:38:15 -0700 Subject: [PATCH 2/3] Add test of \r not followed by \n --- tests/test.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test.rs b/tests/test.rs index bea47ea..2cbc707 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -701,6 +701,9 @@ fn whitespace() { let various_spaces = " \t\u{b}\u{c}\r\n\u{a0}\u{200e}\u{200f}"; let tokens = various_spaces.parse::().unwrap(); assert_eq!(tokens.into_iter().count(), 0); + + let lone_carriage_return = " \r "; + lone_carriage_return.parse::().unwrap(); // FIXME } #[test] From 0c550b285eeccedf57e4310469827fa6964a1ba2 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 24 Jun 2023 11:38:37 -0700 Subject: [PATCH 3/3] Require \r whitespace to be followed by \n --- src/parse.rs | 6 +++++- tests/test.rs | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/parse.rs b/src/parse.rs index d63f734..4269765 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -104,10 +104,14 @@ fn skip_whitespace(input: Cursor) -> Cursor { } } match byte { - b' ' | 0x09..=0x0d => { + b' ' | 0x09..=0x0c => { s = s.advance(1); continue; } + b'\r' if s.as_bytes().get(1) == Some(&b'\n') => { + s = s.advance(2); + continue; + } b if b <= 0x7f => {} _ => { let ch = s.chars().next().unwrap(); diff --git a/tests/test.rs b/tests/test.rs index 2cbc707..58331e9 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -703,7 +703,7 @@ fn whitespace() { assert_eq!(tokens.into_iter().count(), 0); let lone_carriage_return = " \r "; - lone_carriage_return.parse::().unwrap(); // FIXME + lone_carriage_return.parse::().unwrap_err(); } #[test]