From 4891ea7d4c4b47095d2c1524f1267fc53efbdb11 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Tue, 6 Jun 2023 07:54:37 +0200 Subject: [PATCH 1/5] Remove reference in match --- src/format/parse.rs | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/format/parse.rs b/src/format/parse.rs index f1737d7dd5..10a30e9dfb 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -379,28 +379,28 @@ where Item::Fixed(ref spec) => { use super::Fixed::*; - match spec { - &ShortMonthName => { + match *spec { + ShortMonthName => { let month0 = try_consume!(scan::short_month0(s)); parsed.set_month(i64::from(month0) + 1).map_err(|e| (s, e))?; } - &LongMonthName => { + LongMonthName => { let month0 = try_consume!(scan::short_or_long_month0(s)); parsed.set_month(i64::from(month0) + 1).map_err(|e| (s, e))?; } - &ShortWeekdayName => { + ShortWeekdayName => { let weekday = try_consume!(scan::short_weekday(s)); parsed.set_weekday(weekday).map_err(|e| (s, e))?; } - &LongWeekdayName => { + LongWeekdayName => { let weekday = try_consume!(scan::short_or_long_weekday(s)); parsed.set_weekday(weekday).map_err(|e| (s, e))?; } - &LowerAmPm | &UpperAmPm => { + LowerAmPm | UpperAmPm => { if s.len() < 2 { return Err((s, TOO_SHORT)); } @@ -413,14 +413,14 @@ where s = &s[2..]; } - &Nanosecond | &Nanosecond3 | &Nanosecond6 | &Nanosecond9 => { + Nanosecond | Nanosecond3 | Nanosecond6 | Nanosecond9 => { if s.starts_with('.') { let nano = try_consume!(scan::nanosecond(&s[1..])); parsed.set_nanosecond(nano).map_err(|e| (s, e))?; } } - &Internal(InternalFixed { val: InternalInternal::Nanosecond3NoDot }) => { + Internal(InternalFixed { val: InternalInternal::Nanosecond3NoDot }) => { if s.len() < 3 { return Err((s, TOO_SHORT)); } @@ -428,7 +428,7 @@ where parsed.set_nanosecond(nano).map_err(|e| (s, e))?; } - &Internal(InternalFixed { val: InternalInternal::Nanosecond6NoDot }) => { + Internal(InternalFixed { val: InternalInternal::Nanosecond6NoDot }) => { if s.len() < 6 { return Err((s, TOO_SHORT)); } @@ -436,7 +436,7 @@ where parsed.set_nanosecond(nano).map_err(|e| (s, e))?; } - &Internal(InternalFixed { val: InternalInternal::Nanosecond9NoDot }) => { + Internal(InternalFixed { val: InternalInternal::Nanosecond9NoDot }) => { if s.len() < 9 { return Err((s, TOO_SHORT)); } @@ -444,14 +444,14 @@ where parsed.set_nanosecond(nano).map_err(|e| (s, e))?; } - &TimezoneName => { + TimezoneName => { try_consume!(scan::timezone_name_skip(s)); } - &TimezoneOffsetColon - | &TimezoneOffsetDoubleColon - | &TimezoneOffsetTripleColon - | &TimezoneOffset => { + TimezoneOffsetColon + | TimezoneOffsetDoubleColon + | TimezoneOffsetTripleColon + | TimezoneOffset => { let offset = try_consume!(scan::timezone_offset( s.trim_start(), scan::colon_or_space @@ -459,16 +459,14 @@ where parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?; } - &TimezoneOffsetColonZ | &TimezoneOffsetZ => { + TimezoneOffsetColonZ | TimezoneOffsetZ => { let offset = try_consume!(scan::timezone_offset_zulu( s.trim_start(), scan::colon_or_space )); parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?; } - &Internal(InternalFixed { - val: InternalInternal::TimezoneOffsetPermissive, - }) => { + Internal(InternalFixed { val: InternalInternal::TimezoneOffsetPermissive }) => { let offset = try_consume!(scan::timezone_offset_permissive( s.trim_start(), scan::colon_or_space @@ -476,8 +474,8 @@ where parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?; } - &RFC2822 => try_consume!(parse_rfc2822(parsed, s)), - &RFC3339 => try_consume!(parse_rfc3339(parsed, s)), + RFC2822 => try_consume!(parse_rfc2822(parsed, s)), + RFC3339 => try_consume!(parse_rfc3339(parsed, s)), } } From 11c24919b66944f1c0dda49dd34edfc8513085f2 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Tue, 6 Jun 2023 09:03:07 +0200 Subject: [PATCH 2/5] Be slightly more strict around whitespace parsing --- src/format/mod.rs | 3 ++- src/format/parse.rs | 10 +++++++++- src/format/strftime.rs | 7 +++++-- src/naive/date.rs | 2 +- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/format/mod.rs b/src/format/mod.rs index 7c9c3b7dd0..7e0ba6dc2e 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -293,7 +293,8 @@ pub enum Item<'a> { #[cfg(any(feature = "alloc", feature = "std", test))] #[cfg_attr(docsrs, doc(cfg(any(feature = "alloc", feature = "std"))))] OwnedLiteral(Box), - /// Whitespace. Prints literally but reads zero or more whitespace. + /// Whitespace. Prints as a literal but reads one or more Unicode whitespaces. + /// If the string literal is "" this acts as an optional whitespace. Space(&'a str), /// Same as `Space` but with the string owned by the item. #[cfg(any(feature = "alloc", feature = "std", test))] diff --git a/src/format/parse.rs b/src/format/parse.rs index 10a30e9dfb..2eec7869c3 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -320,7 +320,15 @@ where s = &s[prefix.len()..]; } - Item::Space(_) => { + Item::Space(chars) => { + if !chars.is_empty() { + // match at least one character + match s.chars().next() { + None => return Err((s, TOO_SHORT)), + Some(c) if !c.is_whitespace() => return Err((s, INVALID)), + Some(c) => s = &s[c.len_utf8()..], + } + } s = s.trim_start(); } diff --git a/src/format/strftime.rs b/src/format/strftime.rs index 9dae6e7e09..b29f1757fc 100644 --- a/src/format/strftime.rs +++ b/src/format/strftime.rs @@ -82,8 +82,8 @@ The following specifiers are available both to formatting and parsing. | `%s` | `994518299` | UNIX timestamp, the number of seconds since 1970-01-01 00:00 UTC. [^6]| | | | | | | | **SPECIAL SPECIFIERS:** | -| `%t` | | Literal tab (`\t`). | -| `%n` | | Literal newline (`\n`). | +| `%t` | | Literal tab (`\t`), accepts any Unicode whitespace when parsing. | +| `%n` | | Literal newline (`\n`), accepts any Unicode whitespace when parsing. | | `%%` | | Literal percent sign. | It is possible to override the default padding behavior of numeric specifiers `%?`. @@ -97,6 +97,9 @@ Modifier | Description Notes: +One or more Unicode whitespace characters are considered one 'space'-item. When formatting it will +be inserted as a string literal. When parsing it wil match one or more whitespaces. + [^1]: `%C`, `%y`: This is floor division, so 100 BCE (year number -99) will print `-1` and `99` respectively. diff --git a/src/naive/date.rs b/src/naive/date.rs index f34d855a20..a048a3e65c 100644 --- a/src/naive/date.rs +++ b/src/naive/date.rs @@ -3017,7 +3017,7 @@ mod tests { Ok(ymd(2014, 5, 7)) ); // ignore time and offset assert_eq!( - NaiveDate::parse_from_str("2015-W06-1=2015-033", "%G-W%V-%u = %Y-%j"), + NaiveDate::parse_from_str("2015-W06-1 = 2015-033", "%G-W%V-%u = %Y-%j"), Ok(ymd(2015, 2, 2)) ); assert_eq!( From 35f61d608579bd11028f357f3485d6f621c5aca2 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Tue, 6 Jun 2023 09:56:13 +0200 Subject: [PATCH 3/5] Add formatting specifier for optional space --- src/format/strftime.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/format/strftime.rs b/src/format/strftime.rs index b29f1757fc..21e1bb11a3 100644 --- a/src/format/strftime.rs +++ b/src/format/strftime.rs @@ -423,6 +423,7 @@ impl<'a> Iterator for StrftimeItems<'a> { fix!(TimezoneOffset) } } + ' ' => sp!(""), '+' => fix!(RFC3339), ':' => { if self.remainder.starts_with("::z") { @@ -549,7 +550,7 @@ mod tests { assert_eq!(parse_and_collect("%%%%"), [lit!("%"), lit!("%")]); assert_eq!(parse_and_collect("foo%?"), [Item::Error]); assert_eq!(parse_and_collect("bar%42"), [Item::Error]); - assert_eq!(parse_and_collect("quux% +"), [Item::Error]); + assert_eq!(parse_and_collect("quux%"), [Item::Error]); assert_eq!(parse_and_collect("%.Z"), [Item::Error]); assert_eq!(parse_and_collect("%:Z"), [Item::Error]); assert_eq!(parse_and_collect("%-Z"), [Item::Error]); @@ -568,6 +569,9 @@ mod tests { assert_eq!(parse_and_collect("%z"), [fix!(TimezoneOffset)]); assert_eq!(parse_and_collect("%#z"), [internal_fix!(TimezoneOffsetPermissive)]); assert_eq!(parse_and_collect("%#m"), [Item::Error]); + assert_eq!(parse_and_collect("%t"), [sp!("\t")]); + assert_eq!(parse_and_collect("%n"), [sp!("\n")]); + assert_eq!(parse_and_collect("% "), [sp!("")]); } #[test] From 414f61b686efa672b3d53a97f7c04a32a22cfc11 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Tue, 6 Jun 2023 10:28:50 +0200 Subject: [PATCH 4/5] Format optional space as ' ' --- src/format/mod.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/format/mod.rs b/src/format/mod.rs index 7e0ba6dc2e..f21eda6fc7 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -513,9 +513,24 @@ fn format_inner( let locale = Locales::new(locale); match *item { - Item::Literal(s) | Item::Space(s) => result.push_str(s), + Item::Literal(s) => result.push_str(s), #[cfg(any(feature = "alloc", feature = "std", test))] - Item::OwnedLiteral(ref s) | Item::OwnedSpace(ref s) => result.push_str(s), + Item::OwnedLiteral(ref s) => result.push_str(s), + Item::Space(s) => { + if s.is_empty() { + result.push(' ') // print an optional space + } else { + result.push_str(s) + } + } + #[cfg(any(feature = "alloc", feature = "std", test))] + Item::OwnedSpace(ref s) => { + if s.is_empty() { + result.push(' ') // print an optional space + } else { + result.push_str(s) + } + } Item::Numeric(ref spec, ref pad) => { use self::Numeric::*; From c17ae8018d69cdcb4d825182426cb05f28c20698 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Tue, 6 Jun 2023 10:29:09 +0200 Subject: [PATCH 5/5] Make space in `%r` optional --- src/format/strftime.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/format/strftime.rs b/src/format/strftime.rs index 21e1bb11a3..45529afa80 100644 --- a/src/format/strftime.rs +++ b/src/format/strftime.rs @@ -404,7 +404,7 @@ impl<'a> Iterator for StrftimeItems<'a> { num0!(Minute), lit!(":"), num0!(Second), - sp!(" "), + sp!(""), fix!(UpperAmPm) ], 's' => num!(Timestamp),