Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Be slightly more strict around whitespace parsing #1130

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 19 additions & 3 deletions src/format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ pub enum Item<'a> {
#[cfg(any(feature = "alloc", feature = "std", test))]
#[cfg_attr(docsrs, doc(cfg(any(feature = "alloc", feature = "std"))))]
OwnedLiteral(Box<str>),
/// Whitespace. Prints literally but reads zero or more whitespace.
/// Whitespace. Prints as a literal but reads one or more Unicode whitespaces.
/// If the string literal is "" this acts as an optional whitespace.
Space(&'a str),
/// Same as `Space` but with the string owned by the item.
#[cfg(any(feature = "alloc", feature = "std", test))]
Expand Down Expand Up @@ -512,9 +513,24 @@ fn format_inner(
let locale = Locales::new(locale);

match *item {
Item::Literal(s) | Item::Space(s) => result.push_str(s),
Item::Literal(s) => result.push_str(s),
#[cfg(any(feature = "alloc", feature = "std", test))]
Item::OwnedLiteral(ref s) | Item::OwnedSpace(ref s) => result.push_str(s),
Item::OwnedLiteral(ref s) => result.push_str(s),
Item::Space(s) => {
if s.is_empty() {
result.push(' ') // print an optional space
} else {
result.push_str(s)
}
}
#[cfg(any(feature = "alloc", feature = "std", test))]
Item::OwnedSpace(ref s) => {
if s.is_empty() {
result.push(' ') // print an optional space
} else {
result.push_str(s)
}
}

Item::Numeric(ref spec, ref pad) => {
use self::Numeric::*;
Expand Down
50 changes: 28 additions & 22 deletions src/format/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,15 @@ where
s = &s[prefix.len()..];
}

Item::Space(_) => {
Item::Space(chars) => {
if !chars.is_empty() {
// match at least one character
match s.chars().next() {
None => return Err((s, TOO_SHORT)),
Some(c) if !c.is_whitespace() => return Err((s, INVALID)),
Some(c) => s = &s[c.len_utf8()..],
}
}
s = s.trim_start();
}

Expand Down Expand Up @@ -379,28 +387,28 @@ where
Item::Fixed(ref spec) => {
use super::Fixed::*;

match spec {
&ShortMonthName => {
match *spec {
ShortMonthName => {
let month0 = try_consume!(scan::short_month0(s));
parsed.set_month(i64::from(month0) + 1).map_err(|e| (s, e))?;
}

&LongMonthName => {
LongMonthName => {
let month0 = try_consume!(scan::short_or_long_month0(s));
parsed.set_month(i64::from(month0) + 1).map_err(|e| (s, e))?;
}

&ShortWeekdayName => {
ShortWeekdayName => {
let weekday = try_consume!(scan::short_weekday(s));
parsed.set_weekday(weekday).map_err(|e| (s, e))?;
}

&LongWeekdayName => {
LongWeekdayName => {
let weekday = try_consume!(scan::short_or_long_weekday(s));
parsed.set_weekday(weekday).map_err(|e| (s, e))?;
}

&LowerAmPm | &UpperAmPm => {
LowerAmPm | UpperAmPm => {
if s.len() < 2 {
return Err((s, TOO_SHORT));
}
Expand All @@ -413,71 +421,69 @@ where
s = &s[2..];
}

&Nanosecond | &Nanosecond3 | &Nanosecond6 | &Nanosecond9 => {
Nanosecond | Nanosecond3 | Nanosecond6 | Nanosecond9 => {
if s.starts_with('.') {
let nano = try_consume!(scan::nanosecond(&s[1..]));
parsed.set_nanosecond(nano).map_err(|e| (s, e))?;
}
}

&Internal(InternalFixed { val: InternalInternal::Nanosecond3NoDot }) => {
Internal(InternalFixed { val: InternalInternal::Nanosecond3NoDot }) => {
if s.len() < 3 {
return Err((s, TOO_SHORT));
}
let nano = try_consume!(scan::nanosecond_fixed(s, 3));
parsed.set_nanosecond(nano).map_err(|e| (s, e))?;
}

&Internal(InternalFixed { val: InternalInternal::Nanosecond6NoDot }) => {
Internal(InternalFixed { val: InternalInternal::Nanosecond6NoDot }) => {
if s.len() < 6 {
return Err((s, TOO_SHORT));
}
let nano = try_consume!(scan::nanosecond_fixed(s, 6));
parsed.set_nanosecond(nano).map_err(|e| (s, e))?;
}

&Internal(InternalFixed { val: InternalInternal::Nanosecond9NoDot }) => {
Internal(InternalFixed { val: InternalInternal::Nanosecond9NoDot }) => {
if s.len() < 9 {
return Err((s, TOO_SHORT));
}
let nano = try_consume!(scan::nanosecond_fixed(s, 9));
parsed.set_nanosecond(nano).map_err(|e| (s, e))?;
}

&TimezoneName => {
TimezoneName => {
try_consume!(scan::timezone_name_skip(s));
}

&TimezoneOffsetColon
| &TimezoneOffsetDoubleColon
| &TimezoneOffsetTripleColon
| &TimezoneOffset => {
TimezoneOffsetColon
| TimezoneOffsetDoubleColon
| TimezoneOffsetTripleColon
| TimezoneOffset => {
let offset = try_consume!(scan::timezone_offset(
s.trim_start(),
scan::colon_or_space
));
parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?;
}

&TimezoneOffsetColonZ | &TimezoneOffsetZ => {
TimezoneOffsetColonZ | TimezoneOffsetZ => {
let offset = try_consume!(scan::timezone_offset_zulu(
s.trim_start(),
scan::colon_or_space
));
parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?;
}
&Internal(InternalFixed {
val: InternalInternal::TimezoneOffsetPermissive,
}) => {
Internal(InternalFixed { val: InternalInternal::TimezoneOffsetPermissive }) => {
let offset = try_consume!(scan::timezone_offset_permissive(
s.trim_start(),
scan::colon_or_space
));
parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?;
}

&RFC2822 => try_consume!(parse_rfc2822(parsed, s)),
&RFC3339 => try_consume!(parse_rfc3339(parsed, s)),
RFC2822 => try_consume!(parse_rfc2822(parsed, s)),
RFC3339 => try_consume!(parse_rfc3339(parsed, s)),
}
}

Expand Down
15 changes: 11 additions & 4 deletions src/format/strftime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ The following specifiers are available both to formatting and parsing.
| `%s` | `994518299` | UNIX timestamp, the number of seconds since 1970-01-01 00:00 UTC. [^6]|
| | | |
| | | **SPECIAL SPECIFIERS:** |
| `%t` | | Literal tab (`\t`). |
| `%n` | | Literal newline (`\n`). |
| `%t` | | Literal tab (`\t`), accepts any Unicode whitespace when parsing. |
| `%n` | | Literal newline (`\n`), accepts any Unicode whitespace when parsing. |
| `%%` | | Literal percent sign. |

It is possible to override the default padding behavior of numeric specifiers `%?`.
Expand All @@ -97,6 +97,9 @@ Modifier | Description

Notes:

One or more Unicode whitespace characters are considered one 'space'-item. When formatting it will
be inserted as a string literal. When parsing it wil match one or more whitespaces.

[^1]: `%C`, `%y`:
This is floor division, so 100 BCE (year number -99) will print `-1` and `99` respectively.

Expand Down Expand Up @@ -401,7 +404,7 @@ impl<'a> Iterator for StrftimeItems<'a> {
num0!(Minute),
lit!(":"),
num0!(Second),
sp!(" "),
sp!(""),
fix!(UpperAmPm)
],
's' => num!(Timestamp),
Expand All @@ -420,6 +423,7 @@ impl<'a> Iterator for StrftimeItems<'a> {
fix!(TimezoneOffset)
}
}
' ' => sp!(""),
'+' => fix!(RFC3339),
':' => {
if self.remainder.starts_with("::z") {
Expand Down Expand Up @@ -546,7 +550,7 @@ mod tests {
assert_eq!(parse_and_collect("%%%%"), [lit!("%"), lit!("%")]);
assert_eq!(parse_and_collect("foo%?"), [Item::Error]);
assert_eq!(parse_and_collect("bar%42"), [Item::Error]);
assert_eq!(parse_and_collect("quux% +"), [Item::Error]);
assert_eq!(parse_and_collect("quux%"), [Item::Error]);
assert_eq!(parse_and_collect("%.Z"), [Item::Error]);
assert_eq!(parse_and_collect("%:Z"), [Item::Error]);
assert_eq!(parse_and_collect("%-Z"), [Item::Error]);
Expand All @@ -565,6 +569,9 @@ mod tests {
assert_eq!(parse_and_collect("%z"), [fix!(TimezoneOffset)]);
assert_eq!(parse_and_collect("%#z"), [internal_fix!(TimezoneOffsetPermissive)]);
assert_eq!(parse_and_collect("%#m"), [Item::Error]);
assert_eq!(parse_and_collect("%t"), [sp!("\t")]);
assert_eq!(parse_and_collect("%n"), [sp!("\n")]);
assert_eq!(parse_and_collect("% "), [sp!("")]);
}

#[test]
Expand Down
2 changes: 1 addition & 1 deletion src/naive/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3017,7 +3017,7 @@ mod tests {
Ok(ymd(2014, 5, 7))
); // ignore time and offset
assert_eq!(
NaiveDate::parse_from_str("2015-W06-1=2015-033", "%G-W%V-%u = %Y-%j"),
NaiveDate::parse_from_str("2015-W06-1 = 2015-033", "%G-W%V-%u = %Y-%j"),
Ok(ymd(2015, 2, 2))
);
assert_eq!(
Expand Down