From 742fb955a935e2faf3ccf30e751bfb28e33cf839 Mon Sep 17 00:00:00 2001 From: jtmoon79 <815261+jtmoon79@users.noreply.github.com> Date: Sat, 10 Sep 2022 00:14:59 -0700 Subject: [PATCH] Exact whitespace and timezone colon parsing Be exact about allowed whitespace around and between data and formats for all parsing. Except RFC 2822 format which explicitly allows arbitrary whitespace. For timezone colons dividing hours and seconds, limit variations to `":"`, `" "`, `" :"`, `": "`, or `" : "`. Previously chrono allowed unlimited whitespace and colons to divide a timezone, e.g. the unusual string `"09: : : ::: :00"` could be a valid timezone (it shouldn't be). Issue #660 --- src/datetime/tests.rs | 65 +++++----- src/format/mod.rs | 12 +- src/format/parse.rs | 242 +++++++++++++++++++++--------------- src/format/scan.rs | 137 +++++++++++++++++++- src/format/strftime.rs | 53 +++++--- src/naive/date.rs | 48 +++---- src/naive/datetime/tests.rs | 27 +++- src/naive/time/mod.rs | 3 - src/naive/time/tests.rs | 21 +++- 9 files changed, 415 insertions(+), 193 deletions(-) diff --git a/src/datetime/tests.rs b/src/datetime/tests.rs index ebfc5c4a08..2f6f6ff8cf 100644 --- a/src/datetime/tests.rs +++ b/src/datetime/tests.rs @@ -259,18 +259,6 @@ fn test_parse_datetime_utc() { let valid = [ "2001-02-03T04:05:06Z", "2012-12-12T12:12:12Z", - "2012 -12-12T12:12:12Z", - "2012 -12-12T12:12:12Z", - "2012- 12-12T12:12:12Z", - "2012- 12-12T12:12:12Z", - "2012-12-12T 12:12:12Z", - "2012-12-12T12 :12:12Z", - "2012-12-12T12 :12:12Z", - "2012-12-12T12: 12:12Z", - "2012-12-12T12: 12:12Z", - "2012-12-12T12 : 12:12Z", - "2012-12-12T12:12:12Z ", - " 2012-12-12T12:12:12Z", "2015-02-18T23:16:09.153Z", "2015-2-18T23:16:09.153Z", "+2015-2-18T23:16:09.153Z", @@ -327,6 +315,18 @@ fn test_parse_datetime_utc() { "2012-12-12t12:12:12Z", // wrong divider 't' "+802701-12-12T12:12:12Z", // invalid year (out of bounds) "+ 2012-12-12T12:12:12Z", // invalid space before year + "2012 -12-12T12:12:12Z", // space after year + "2012 -12-12T12:12:12Z", // multi space after year + "2012- 12-12T12:12:12Z", // space after year divider + "2012- 12-12T12:12:12Z", // multi space after year divider + "2012-12-12T 12:12:12Z", // space after date-time divider + "2012-12-12T12 :12:12Z", // space after hour + "2012-12-12T12 :12:12Z", // multi space after hour + "2012-12-12T12: 12:12Z", // space before minute + "2012-12-12T12: 12:12Z", // multi space before minute + "2012-12-12T12 : 12:12Z", // space space before and after hour-minute divider + "2012-12-12T12:12:12Z ", // trailing space + " 2012-12-12T12:12:12Z", // leading space " +82701 - 05 - 6 T 15 : 9 : 60.898989898989 Z", // valid datetime, wrong format ]; for &s in &invalid { @@ -395,12 +395,23 @@ fn test_utc_datetime_from_str_with_spaces() { assert_eq!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S"), Ok(dt),); assert_eq!(Utc.datetime_from_str("Aug 09 2013\t23:54:35", "%b %d %Y\t%H:%M:%S"), Ok(dt),); assert_eq!(Utc.datetime_from_str("Aug 09 2013\t\t23:54:35", "%b %d %Y\t\t%H:%M:%S"), Ok(dt),); - assert_eq!(Utc.datetime_from_str("Aug 09 2013 23:54:35 ", "%b %d %Y %H:%M:%S\n"), Ok(dt),); - assert_eq!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y\t%H:%M:%S"), Ok(dt),); - assert_eq!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S "), Ok(dt),); - assert_eq!(Utc.datetime_from_str("Aug 09 2013 23:54:35", " %b %d %Y %H:%M:%S"), Ok(dt),); - assert_eq!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S\n"), Ok(dt),); // with varying spaces - should fail + // leading whitespace in format + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35", " %b %d %Y %H:%M:%S").is_err()); + // trailing whitespace in format + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S ").is_err()); + // extra mid-string whitespace in format + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S").is_err()); + // mismatched leading whitespace + assert!(Utc.datetime_from_str("\tAug 09 2013 23:54:35", "\n%b %d %Y %H:%M:%S").is_err()); + // mismatched trailing whitespace + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35 ", "%b %d %Y %H:%M:%S\n").is_err()); + // mismatched mid-string whitespace + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y\t%H:%M:%S").is_err()); + // trailing whitespace in format + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S ").is_err()); + // trailing whitespace (newline) in format + assert!(Utc.datetime_from_str("Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S\n").is_err()); // leading space in data assert!(Utc.datetime_from_str(" Aug 09 2013 23:54:35", "%b %d %Y %H:%M:%S").is_err()); // trailing space in data @@ -447,9 +458,8 @@ fn test_datetime_parse_from_str() { assert!( DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00\n", "%b %d %Y %H:%M:%S %z").is_err() ); - assert_eq!( - DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00\n", "%b %d %Y %H:%M:%S %z "), - Ok(dt), + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:00\n", "%b %d %Y %H:%M:%S %z ").is_err() ); // trailing colon assert!( @@ -620,9 +630,8 @@ fn test_datetime_parse_from_str() { DateTime::parse_from_str("Aug 09 2013 23:54:35 -09:", "%b %d %Y %H:%M:%S %#z"), Ok(dt), ); - assert_eq!( - DateTime::parse_from_str("Aug 09 2013 23:54:35 -09: ", "%b %d %Y %H:%M:%S %#z "), - Ok(dt), + assert!( + DateTime::parse_from_str("Aug 09 2013 23:54:35 -09: ", "%b %d %Y %H:%M:%S %#z ").is_err(), ); assert_eq!( DateTime::parse_from_str("Aug 09 2013 23:54:35+-09", "%b %d %Y %H:%M:%S+%#z"), @@ -632,13 +641,11 @@ fn test_datetime_parse_from_str() { DateTime::parse_from_str("Aug 09 2013 23:54:35--09", "%b %d %Y %H:%M:%S-%#z"), Ok(dt), ); - assert_eq!( - DateTime::parse_from_str("Aug 09 2013 -09:00 23:54:35", "%b %d %Y %#z%H:%M:%S"), - Ok(dt), + assert!( + DateTime::parse_from_str("Aug 09 2013 -09:00 23:54:35", "%b %d %Y %#z%H:%M:%S").is_err(), ); - assert_eq!( - DateTime::parse_from_str("Aug 09 2013 -0900 23:54:35", "%b %d %Y %#z%H:%M:%S"), - Ok(dt), + assert!( + DateTime::parse_from_str("Aug 09 2013 -0900 23:54:35", "%b %d %Y %#z%H:%M:%S").is_err(), ); assert_eq!( DateTime::parse_from_str("Aug 09 2013 -090023:54:35", "%b %d %Y %#z%H:%M:%S"), diff --git a/src/format/mod.rs b/src/format/mod.rs index ff0363bb94..7c541f015c 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -218,27 +218,27 @@ pub enum Fixed { /// /// It does not support parsing, its use in the parser is an immediate failure. TimezoneName, - /// Offset from the local time to UTC (`+09:00` or `-04:00` or `+00:00`). + /// Offset from the local time to UTC (`+09:00` or `-0400` or `+00:00`). /// - /// In the parser, the colon can be omitted and/or surrounded with any amount of whitespace. + /// In the parser, the colon may be omitted, /// The offset is limited from `-24:00` to `+24:00`, /// which is the same as [`FixedOffset`](../offset/struct.FixedOffset.html)'s range. TimezoneOffsetColon, /// Offset from the local time to UTC with seconds (`+09:00:00` or `-04:00:00` or `+00:00:00`). /// - /// In the parser, the colon can be omitted and/or surrounded with any amount of whitespace. + /// In the parser, the colon may be omitted, /// The offset is limited from `-24:00:00` to `+24:00:00`, /// which is the same as [`FixedOffset`](../offset/struct.FixedOffset.html)'s range. TimezoneOffsetDoubleColon, /// Offset from the local time to UTC without minutes (`+09` or `-04` or `+00`). /// - /// In the parser, the colon can be omitted and/or surrounded with any amount of whitespace. + /// In the parser, the colon may be omitted, /// The offset is limited from `-24` to `+24`, /// which is the same as [`FixedOffset`](../offset/struct.FixedOffset.html)'s range. TimezoneOffsetTripleColon, - /// Offset from the local time to UTC (`+09:00` or `-04:00` or `Z`). + /// Offset from the local time to UTC (`+09:00` or `-0400` or `Z`). /// - /// In the parser, the colon can be omitted and/or surrounded with any amount of whitespace, + /// In the parser, the colon may be omitted, /// and `Z` can be either in upper case or in lower case. /// The offset is limited from `-24:00` to `+24:00`, /// which is the same as [`FixedOffset`](../offset/struct.FixedOffset.html)'s range. diff --git a/src/format/parse.rs b/src/format/parse.rs index 7a26fefc14..d4259cd40a 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -42,6 +42,10 @@ fn set_weekday_with_number_from_monday(p: &mut Parsed, v: i64) -> ParseResult<() }) } +/// Parse an RFC 2822 format datetime +/// e.g. `Fri, 21 Nov 1997 09:55:06 -0600` +/// +/// This function allows arbitrary intermixed whitespace per RFC 2822 appendix A.5 fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a str, ())> { macro_rules! try_consume { ($e:expr) => {{ @@ -237,7 +241,7 @@ fn parse_rfc3339<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st /// /// - Padding-agnostic (for numeric items). /// The [`Pad`](./enum.Pad.html) field is completely ignored, -/// so one can prepend any number of whitespace then any number of zeroes before numbers. +/// so one can prepend any number of zeroes before numbers. /// /// - (Still) obeying the intrinsic parsing width. This allows, for example, parsing `HHMMSS`. pub fn parse<'a, I, B>(parsed: &mut Parsed, s: &str, items: I) -> ParseResult<()> @@ -292,13 +296,37 @@ where s = &s[prefix.len()..]; } - Item::Space(_) => { - s = s.trim_left(); + Item::Space(item_space) => { + for item_c in item_space.chars() { + let c = match s.chars().next() { + Some(c_) => c_, + None => { + return Err((s, TOO_SHORT)); + } + }; + if item_c != c { + return Err((s, INVALID)); + } + // advance `s` forward 1 char + s = scan::s_next(s); + } } #[cfg(any(feature = "alloc", feature = "std", test))] - Item::OwnedSpace(_) => { - s = s.trim_left(); + Item::OwnedSpace(ref item_space) => { + for item_c in item_space.chars() { + let c = match s.chars().next() { + Some(c_) => c_, + None => { + return Err((s, TOO_SHORT)); + } + }; + if item_c != c { + return Err((s, INVALID)); + } + // advance `s` forward 1 char + s = scan::s_next(s); + } } Item::Numeric(ref spec, ref _pad) => { @@ -331,7 +359,6 @@ where Internal(ref int) => match int._dummy {}, }; - s = s.trim_left(); let v = if signed { if s.starts_with('-') { let v = try_consume!(scan::number(&s[1..], 1, usize::MAX)); @@ -424,27 +451,24 @@ where | &TimezoneOffsetDoubleColon | &TimezoneOffsetTripleColon | &TimezoneOffset => { - let offset = try_consume!(scan::timezone_offset( - s.trim_left(), - scan::colon_or_space - )); + s = scan::space1(s); + let offset = try_consume!(scan::timezone_offset(s, scan::colon_or_space)); parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?; } &TimezoneOffsetColonZ | &TimezoneOffsetZ => { - let offset = try_consume!(scan::timezone_offset_zulu( - s.trim_left(), - scan::colon_or_space - )); + s = scan::space1(s); + let offset = + try_consume!(scan::timezone_offset_zulu(s, scan::colon_or_space)); parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?; } + &Internal(InternalFixed { val: InternalInternal::TimezoneOffsetPermissive, }) => { - let offset = try_consume!(scan::timezone_offset_permissive( - s.trim_left(), - scan::colon_or_space - )); + s = scan::space1(s); + let offset = + try_consume!(scan::timezone_offset_permissive(s, scan::colon_or_space)); parsed.set_offset(i64::from(offset)).map_err(|e| (s, e))?; } @@ -468,15 +492,13 @@ where } /// Accepts a relaxed form of RFC3339. -/// A space or a 'T' are acepted as the separator between the date and time -/// parts. Additional spaces are allowed between each component. +/// A space or a 'T' are accepted as the separator between the date and time +/// parts. /// -/// All of these examples are equivalent: /// ``` /// # use chrono::{DateTime, offset::FixedOffset}; -/// "2012-12-12T12:12:12Z".parse::>(); -/// "2012-12-12 12:12:12Z".parse::>(); -/// "2012- 12-12T12: 12:12Z".parse::>(); +/// "2000-01-02T03:04:05Z".parse::>(); +/// "2000-01-02 03:04:05Z".parse::>(); /// ``` impl str::FromStr for DateTime { type Err = ParseError; @@ -484,25 +506,19 @@ impl str::FromStr for DateTime { fn from_str(s: &str) -> ParseResult> { const DATE_ITEMS: &[Item<'static>] = &[ Item::Numeric(Numeric::Year, Pad::Zero), - Item::Space(""), Item::Literal("-"), Item::Numeric(Numeric::Month, Pad::Zero), - Item::Space(""), Item::Literal("-"), Item::Numeric(Numeric::Day, Pad::Zero), ]; const TIME_ITEMS: &[Item<'static>] = &[ Item::Numeric(Numeric::Hour, Pad::Zero), - Item::Space(""), Item::Literal(":"), Item::Numeric(Numeric::Minute, Pad::Zero), - Item::Space(""), Item::Literal(":"), Item::Numeric(Numeric::Second, Pad::Zero), Item::Fixed(Fixed::Nanosecond), - Item::Space(""), Item::Fixed(Fixed::TimezoneOffsetZ), - Item::Space(""), ]; let mut parsed = Parsed::new(); @@ -524,7 +540,6 @@ impl str::FromStr for DateTime { #[cfg(test)] #[test] fn test_parse() { - use super::IMPOSSIBLE; use super::*; // workaround for Rust issue #22255 @@ -556,19 +571,34 @@ fn test_parse() { // whitespaces check!("", [sp!("")]; ); - check!(" ", [sp!("")]; ); - check!("\t", [sp!("")]; ); - check!(" \n\r \n", [sp!("")]; ); check!(" ", [sp!(" ")]; ); check!(" ", [sp!(" ")]; ); + check!(" ", [sp!(" ")]; ); + check!(" ", [sp!("")]; TOO_LONG); + check!(" ", [sp!(" ")]; TOO_LONG); + check!(" ", [sp!(" ")]; TOO_LONG); + check!(" ", [sp!(" ")]; TOO_LONG); + check!("", [sp!(" ")]; TOO_SHORT); + check!(" ", [sp!(" ")]; TOO_SHORT); + check!(" ", [sp!(" ")]; TOO_SHORT); + check!(" ", [sp!(" "), sp!(" ")]; TOO_SHORT); + check!(" ", [sp!(" "), sp!(" ")]; TOO_SHORT); check!(" ", [sp!(" "), sp!(" ")]; ); check!(" ", [sp!(" "), sp!(" ")]; ); check!(" ", [sp!(" "), sp!(" ")]; ); check!(" ", [sp!(" "), sp!(" "), sp!(" ")]; ); + check!("\t", [sp!("")]; TOO_LONG); + check!(" \n\r \n", [sp!("")]; TOO_LONG); check!("\t", [sp!("\t")]; ); + check!("\t", [sp!(" ")]; INVALID); + check!(" ", [sp!("\t")]; INVALID); check!("\t\r", [sp!("\t\r")]; ); check!("\t\r ", [sp!("\t\r ")]; ); + check!("\t \r", [sp!("\t \r")]; ); + check!(" \t\r", [sp!(" \t\r")]; ); check!(" \n\r \n", [sp!(" \n\r \n")]; ); + check!(" \t\n", [sp!(" \t")]; TOO_LONG); + check!(" \n\t", [sp!(" \t\n")]; INVALID); check!("\u{2002}", [sp!("\u{2002}")]; ); // most unicode whitespace characters check!( @@ -584,16 +614,13 @@ fn test_parse() { ]; ); check!("a", [sp!("")]; TOO_LONG); - check!("a", [sp!(" ")]; TOO_LONG); - // a Space containing a literal cannot match a literal - check!("a", [sp!("a")]; TOO_LONG); + check!("a", [sp!(" ")]; INVALID); + // a Space containing a literal can match a literal, but this should not be done + check!("a", [sp!("a")]; ); check!("abc", [sp!("")]; TOO_LONG); - check!(" ", [sp!(" ")]; ); - check!(" \t\n", [sp!(" \t")]; ); - check!("", [sp!(" ")]; ); - check!(" ", [sp!(" ")]; ); - check!(" ", [sp!(" ")]; ); - check!(" ", [sp!(" "), sp!(" ")]; ); + check!("abc", [sp!(" ")]; INVALID); + check!(" abc", [sp!("")]; TOO_LONG); + check!(" abc", [sp!(" ")]; TOO_LONG); // `\u{0363}` is combining diacritic mark "COMBINING LATIN SMALL LETTER A" @@ -634,7 +661,7 @@ fn test_parse() { // check!("x y", [lit!("x"), lit!("y")]; INVALID); check!("xy", [lit!("x"), sp!(""), lit!("y")]; ); - check!("x y", [lit!("x"), sp!(""), lit!("y")]; ); + check!("x y", [lit!("x"), sp!(""), lit!("y")]; INVALID); check!("x y", [lit!("x"), sp!(" "), lit!("y")]; ); // whitespaces + literals @@ -655,7 +682,7 @@ fn test_parse() { check!("2015", [num!(Year)]; year: 2015); check!("0000", [num!(Year)]; year: 0); check!("9999", [num!(Year)]; year: 9999); - check!(" \t987", [num!(Year)]; year: 987); + check!(" \t987", [num!(Year)]; INVALID); check!(" \t987", [sp!(" \t"), num!(Year)]; year: 987); check!(" \t987🤠", [sp!(" \t"), num!(Year), lit!("🤠")]; year: 987); check!("987🤠", [num!(Year), lit!("🤠")]; year: 987); @@ -667,9 +694,9 @@ fn test_parse() { check!("12345", [nums!(Year), lit!("5")]; year: 1234); check!("12345", [num0!(Year), lit!("5")]; year: 1234); check!("12341234", [num!(Year), num!(Year)]; year: 1234); - check!("1234 1234", [num!(Year), num!(Year)]; year: 1234); + check!("1234 1234", [num!(Year), num!(Year)]; INVALID); check!("1234 1234", [num!(Year), sp!(" "), num!(Year)]; year: 1234); - check!("1234 1235", [num!(Year), num!(Year)]; IMPOSSIBLE); + check!("1234 1235", [num!(Year), num!(Year)]; INVALID); check!("1234 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); check!("1234x1234", [num!(Year), lit!("x"), num!(Year)]; year: 1234); check!("1234 x 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); @@ -685,8 +712,10 @@ fn test_parse() { check!("+0042", [num!(Year)]; year: 42); check!("-42195", [num!(Year)]; year: -42195); check!("+42195", [num!(Year)]; year: 42195); - check!(" -42195", [num!(Year)]; year: -42195); - check!(" +42195", [num!(Year)]; year: 42195); + check!(" -42195", [num!(Year)]; INVALID); + check!(" +42195", [num!(Year)]; INVALID); + check!(" -42195", [num!(Year)]; INVALID); + check!(" +42195", [num!(Year)]; INVALID); check!("-42195 ", [num!(Year)]; TOO_LONG); check!("+42195 ", [num!(Year)]; TOO_LONG); check!(" - 42", [num!(Year)]; INVALID); @@ -702,7 +731,8 @@ fn test_parse() { check!("345", [num!(Ordinal)]; ordinal: 345); check!("+345", [num!(Ordinal)]; INVALID); check!("-345", [num!(Ordinal)]; INVALID); - check!(" 345", [num!(Ordinal)]; ordinal: 345); + check!(" 345", [num!(Ordinal)]; INVALID); + check!("345 ", [num!(Ordinal)]; TOO_LONG); check!(" 345", [sp!(" "), num!(Ordinal)]; ordinal: 345); check!("345 ", [num!(Ordinal), sp!(" ")]; ordinal: 345); check!("345🤠 ", [num!(Ordinal), lit!("🤠"), sp!(" ")]; ordinal: 345); @@ -715,21 +745,27 @@ fn test_parse() { check!(" -345", [sp!(" "), num!(Ordinal)]; INVALID); // various numeric fields + check!("1234 5678", [num!(Year), num!(IsoYear)]; INVALID); check!("1234 5678", - [num!(Year), num!(IsoYear)]; + [num!(Year), sp!(" "), num!(IsoYear)]; year: 1234, isoyear: 5678); check!("12 34 56 78", [num!(YearDiv100), num!(YearMod100), num!(IsoYearDiv100), num!(IsoYearMod100)]; + INVALID); + check!("12 34🤠56 78", + [num!(YearDiv100), sp!(" "), num!(YearMod100), + lit!("🤠"), num!(IsoYearDiv100), sp!(" "), num!(IsoYearMod100)]; year_div_100: 12, year_mod_100: 34, isoyear_div_100: 56, isoyear_mod_100: 78); check!("1 2 3 4 5 6", - [num!(Month), num!(Day), num!(WeekFromSun), num!(WeekFromMon), num!(IsoWeek), - num!(NumDaysFromSun)]; + [num!(Month), sp!(" "), num!(Day), sp!(" "), num!(WeekFromSun), sp!(" "), + num!(WeekFromMon), sp!(" "), num!(IsoWeek), sp!(" "), num!(NumDaysFromSun)]; month: 1, day: 2, week_from_sun: 3, week_from_mon: 4, isoweek: 5, weekday: Weekday::Sat); check!("7 89 01", - [num!(WeekdayFromMon), num!(Ordinal), num!(Hour12)]; + [num!(WeekdayFromMon), sp!(" "), num!(Ordinal), sp!(" "), num!(Hour12)]; weekday: Weekday::Sun, ordinal: 89, hour_mod_12: 1); check!("23 45 6 78901234 567890123", - [num!(Hour), num!(Minute), num!(Second), num!(Nanosecond), num!(Timestamp)]; + [num!(Hour), sp!(" "), num!(Minute), sp!(" "), num!(Second), sp!(" "), + num!(Nanosecond), sp!(" "), num!(Timestamp)]; hour_div_12: 1, hour_mod_12: 11, minute: 45, second: 6, nanosecond: 78_901_234, timestamp: 567_890_123); @@ -890,11 +926,11 @@ fn test_parse() { check!("+12:34:56:", [fix!(TimezoneOffset)]; TOO_LONG); check!("12:34", [fix!(TimezoneOffset)]; INVALID); check!("12:34:56", [fix!(TimezoneOffset)]; INVALID); - check!("+12::34", [fix!(TimezoneOffset)]; offset: 45_240); - check!("+12: :34", [fix!(TimezoneOffset)]; offset: 45_240); - check!("+12:::34", [fix!(TimezoneOffset)]; offset: 45_240); - check!("+12::::34", [fix!(TimezoneOffset)]; offset: 45_240); - check!("+12::34", [fix!(TimezoneOffset)]; offset: 45_240); + check!("+12::34", [fix!(TimezoneOffset)]; INVALID); + check!("+12: :34", [fix!(TimezoneOffset)]; INVALID); + check!("+12:::34", [fix!(TimezoneOffset)]; INVALID); + check!("+12::::34", [fix!(TimezoneOffset)]; INVALID); + check!("+12::34", [fix!(TimezoneOffset)]; INVALID); check!("+12:34:56", [fix!(TimezoneOffset)]; TOO_LONG); check!("+12:3456", [fix!(TimezoneOffset)]; TOO_LONG); check!("+1234:56", [fix!(TimezoneOffset)]; TOO_LONG); @@ -915,17 +951,17 @@ fn test_parse() { check!("+12:34 ", [fix!(TimezoneOffset)]; TOO_LONG); check!(" +12:34", [fix!(TimezoneOffset)]; offset: 45_240); check!(" -12:34", [fix!(TimezoneOffset)]; offset: -45_240); - check!(" +12:34", [fix!(TimezoneOffset)]; offset: 45_240); - check!(" -12:34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("\t -12:34", [fix!(TimezoneOffset)]; offset: -45_240); + check!(" +12:34", [fix!(TimezoneOffset)]; INVALID); + check!(" -12:34", [fix!(TimezoneOffset)]; INVALID); + check!("\t -12:34", [fix!(TimezoneOffset)]; INVALID); check!("-12: 34", [fix!(TimezoneOffset)]; offset: -45_240); check!("-12 :34", [fix!(TimezoneOffset)]; offset: -45_240); check!("-12 : 34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("-12 : 34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("-12 : 34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("-12: 34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("-12 :34", [fix!(TimezoneOffset)]; offset: -45_240); - check!("-12 : 34", [fix!(TimezoneOffset)]; offset: -45_240); + check!("-12 : 34", [fix!(TimezoneOffset)]; INVALID); + check!("-12 : 34", [fix!(TimezoneOffset)]; INVALID); + check!("-12: 34", [fix!(TimezoneOffset)]; INVALID); + check!("-12 :34", [fix!(TimezoneOffset)]; INVALID); + check!("-12 : 34", [fix!(TimezoneOffset)]; INVALID); check!("12:34 ", [fix!(TimezoneOffset)]; INVALID); check!(" 12:34", [fix!(TimezoneOffset)]; INVALID); check!("", [fix!(TimezoneOffset)]; TOO_SHORT); @@ -945,7 +981,7 @@ fn test_parse() { check!("Z", [fix!(TimezoneOffset)]; INVALID); check!("#Z", [fix!(TimezoneOffset)]; INVALID); check!(":Z", [fix!(TimezoneOffset)]; INVALID); - check!("+Z", [fix!(TimezoneOffset)]; TOO_SHORT); + check!("+Z", [fix!(TimezoneOffset)]; INVALID); check!("+:Z", [fix!(TimezoneOffset)]; INVALID); check!("+Z:", [fix!(TimezoneOffset)]; INVALID); check!("z", [fix!(TimezoneOffset)]; INVALID); @@ -994,20 +1030,20 @@ fn test_parse() { check!("+12: 34", [fix!(TimezoneOffsetColon)]; offset: 45_240); check!("+12 :34", [fix!(TimezoneOffsetColon)]; offset: 45_240); check!("+12 : 34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12::34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12: :34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12:::34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12::::34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("+12::34", [fix!(TimezoneOffsetColon)]; offset: 45_240); + check!("+12 : 34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12 : 34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12 : 34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12::34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12: :34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12:::34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12::::34", [fix!(TimezoneOffsetColon)]; INVALID); + check!("+12::34", [fix!(TimezoneOffsetColon)]; INVALID); check!("#1234", [fix!(TimezoneOffsetColon)]; INVALID); check!("#12:34", [fix!(TimezoneOffsetColon)]; INVALID); check!("+12:34 ", [fix!(TimezoneOffsetColon)]; TOO_LONG); check!(" +12:34", [fix!(TimezoneOffsetColon)]; offset: 45_240); check!("\t+12:34", [fix!(TimezoneOffsetColon)]; offset: 45_240); - check!("\t\t+12:34", [fix!(TimezoneOffsetColon)]; offset: 45_240); + check!("\t\t+12:34", [fix!(TimezoneOffsetColon)]; INVALID); check!("12:34 ", [fix!(TimezoneOffsetColon)]; INVALID); check!(" 12:34", [fix!(TimezoneOffsetColon)]; INVALID); check!("", [fix!(TimezoneOffsetColon)]; TOO_SHORT); @@ -1019,7 +1055,7 @@ fn test_parse() { check!("Z", [fix!(TimezoneOffsetColon)]; INVALID); check!("#Z", [fix!(TimezoneOffsetColon)]; INVALID); check!(":Z", [fix!(TimezoneOffsetColon)]; INVALID); - check!("+Z", [fix!(TimezoneOffsetColon)]; TOO_SHORT); + check!("+Z", [fix!(TimezoneOffsetColon)]; INVALID); check!("+:Z", [fix!(TimezoneOffsetColon)]; INVALID); check!("+Z:", [fix!(TimezoneOffsetColon)]; INVALID); check!("z", [fix!(TimezoneOffsetColon)]; INVALID); @@ -1067,15 +1103,15 @@ fn test_parse() { check!("+12:34:56:", [fix!(TimezoneOffsetZ)]; TOO_LONG); check!("+12:34:56:7", [fix!(TimezoneOffsetZ)]; TOO_LONG); check!("+12:34:56:78", [fix!(TimezoneOffsetZ)]; TOO_LONG); - check!("+12::34", [fix!(TimezoneOffsetZ)]; offset: 45_240); + check!("+12::34", [fix!(TimezoneOffsetZ)]; INVALID); check!("+12:3456", [fix!(TimezoneOffsetZ)]; TOO_LONG); check!("+1234:56", [fix!(TimezoneOffsetZ)]; TOO_LONG); check!("+12: 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); check!("+12 :34", [fix!(TimezoneOffsetZ)]; offset: 45_240); check!("+12 : 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); - check!("+12 : 34", [fix!(TimezoneOffsetZ)]; offset: 45_240); + check!("+12 : 34", [fix!(TimezoneOffsetZ)]; INVALID); + check!("+12 : 34", [fix!(TimezoneOffsetZ)]; INVALID); + check!("+12 : 34", [fix!(TimezoneOffsetZ)]; INVALID); check!("12:34 ", [fix!(TimezoneOffsetZ)]; INVALID); check!(" 12:34", [fix!(TimezoneOffsetZ)]; INVALID); check!("+12:34 ", [fix!(TimezoneOffsetZ)]; TOO_LONG); @@ -1094,12 +1130,12 @@ fn test_parse() { check!("#Z", [fix!(TimezoneOffsetZ)]; INVALID); check!(":Z", [fix!(TimezoneOffsetZ)]; INVALID); check!(":z", [fix!(TimezoneOffsetZ)]; INVALID); - check!("+Z", [fix!(TimezoneOffsetZ)]; TOO_SHORT); - check!("-Z", [fix!(TimezoneOffsetZ)]; TOO_SHORT); + check!("+Z", [fix!(TimezoneOffsetZ)]; INVALID); + check!("-Z", [fix!(TimezoneOffsetZ)]; INVALID); check!("+Z:", [fix!(TimezoneOffsetZ)]; INVALID); check!(" :Z", [fix!(TimezoneOffsetZ)]; INVALID); - check!(" +Z", [fix!(TimezoneOffsetZ)]; TOO_SHORT); - check!(" -Z", [fix!(TimezoneOffsetZ)]; TOO_SHORT); + check!(" +Z", [fix!(TimezoneOffsetZ)]; INVALID); + check!(" -Z", [fix!(TimezoneOffsetZ)]; INVALID); check!("+:Z", [fix!(TimezoneOffsetZ)]; INVALID); check!("Y", [fix!(TimezoneOffsetZ)]; INVALID); check!("Zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0); @@ -1149,18 +1185,18 @@ fn test_parse() { check!("+12 :34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); check!("+12: 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); check!("+12 : 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12 :34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12: 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12 : 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12::34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12 ::34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12: :34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12:: 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12 ::34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12: :34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12:: 34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12:::34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); - check!("+12::::34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240); + check!("+12 :34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12: 34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12 : 34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12::34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12 ::34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12: :34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12:: 34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12 ::34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12: :34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12:: 34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12:::34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("+12::::34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!("12:34 ", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!(" 12:34", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!("+12:34 ", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG); @@ -1181,12 +1217,12 @@ fn test_parse() { check!("#Z", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!(":Z", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!(":z", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); - check!("+Z", [internal_fix!(TimezoneOffsetPermissive)]; TOO_SHORT); - check!("-Z", [internal_fix!(TimezoneOffsetPermissive)]; TOO_SHORT); + check!("+Z", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!("-Z", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!("+Z:", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!(" :Z", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); - check!(" +Z", [internal_fix!(TimezoneOffsetPermissive)]; TOO_SHORT); - check!(" -Z", [internal_fix!(TimezoneOffsetPermissive)]; TOO_SHORT); + check!(" +Z", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); + check!(" -Z", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!("+:Z", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); check!("Y", [internal_fix!(TimezoneOffsetPermissive)]; INVALID); diff --git a/src/format/scan.rs b/src/format/scan.rs index 7334a3b2ed..1d2ec9d9c3 100644 --- a/src/format/scan.rs +++ b/src/format/scan.rs @@ -198,9 +198,67 @@ pub(super) fn space(s: &str) -> ParseResult<&str> { } } -/// Consumes any number (including zero) of colon or spaces. +/// Returns slice remaining after first char. +/// If <=1 chars in `s` then return an empty slice +pub(super) fn s_next(s: &str) -> &str { + match s.char_indices().nth(1) { + Some((offset, _)) => &s[offset..], + None => { + // one or zero chars in `s`, return empty string + &s[s.len()..] + } + } +} + +/// If the first `char` is whitespace then consume it and return `s`. +/// Else return `s`. +pub(super) fn space1(s: &str) -> &str { + match s.chars().next() { + Some(c) if c.is_whitespace() => s_next(s), + Some(_) | None => s, + } +} + +/// Allow a colon with possible one-character whitespace padding. +/// Consumes zero or one of these leading patterns: +/// `":"`, `" "`, `" :"`, `": "`, or `" : "`. pub(super) fn colon_or_space(s: &str) -> ParseResult<&str> { - Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace())) + let c0_ = match s.chars().next() { + Some(c) => c, + None => { + return Ok(s); + } + }; + if c0_ != ':' && !c0_.is_whitespace() { + return Ok(s); + } + let c1_ = s.chars().nth(1); + match (c0_, c1_) { + (c0, None) if c0 == ':' || c0.is_whitespace() => { + return Ok(s_next(s)); + } + (c0, Some(c1)) if c0 == ':' && c1.is_whitespace() => { + return Ok(s_next(s_next(s))); + } + (c0, Some(c1)) if c0 == ':' && !c1.is_whitespace() => { + return Ok(s_next(s)); + } + (c0, Some(c1)) if c0.is_whitespace() && (!c1.is_whitespace() && c1 != ':') => { + return Ok(s_next(s)); + } + _ => {} + } + let c2_ = s.chars().nth(2); + match (c0_, c1_, c2_) { + (c0, Some(c1), None) if c0.is_whitespace() && c1 == ':' => Ok(s_next(s_next(s))), + (c0, Some(c1), Some(c2)) if c0.is_whitespace() && c1 == ':' && !c2.is_whitespace() => { + Ok(s_next(s_next(s))) + } + (c0, Some(c1), Some(c2)) if c0.is_whitespace() && c1 == ':' && c2.is_whitespace() => { + Ok(s_next(s_next(s_next(s)))) + } + _ => Ok(s), + } } /// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible. @@ -238,6 +296,16 @@ where }; s = &s[1..]; + // special check for `Z` to return more accurate error `INVALID`. + // Otherwise the upcoming match for digits might return error `TOO_SHORT` + // which is confusing for the user. + match s.as_bytes().first() { + Some(&b'Z') | Some(&b'z') => { + return Err(INVALID); + } + _ => {} + } + // hours (00--99) let hours = match digits(s)? { (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')), @@ -413,3 +481,68 @@ fn test_rfc2822_comments() { ); } } + +#[test] +fn test_space() { + assert_eq!(space(""), Err(TOO_SHORT)); + assert_eq!(space(" "), Ok("")); + assert_eq!(space(" \t"), Ok("")); + assert_eq!(space(" \ta"), Ok("a")); + assert_eq!(space(" \ta "), Ok("a ")); + assert_eq!(space("a"), Err(INVALID)); + assert_eq!(space("a "), Err(INVALID)); +} + +#[test] +fn test_s_next() { + assert_eq!(s_next(""), ""); + assert_eq!(s_next(" "), ""); + assert_eq!(s_next("a"), ""); + assert_eq!(s_next("ab"), "b"); + assert_eq!(s_next("abc"), "bc"); + assert_eq!(s_next("😾b"), "b"); + assert_eq!(s_next("a😾"), "😾"); + assert_eq!(s_next("😾bc"), "bc"); + assert_eq!(s_next("a😾c"), "😾c"); +} + +#[test] +fn test_space1() { + assert_eq!(space1(""), ""); + assert_eq!(space1(" "), ""); + assert_eq!(space1("\t"), ""); + assert_eq!(space1("\t\t"), "\t"); + assert_eq!(space1(" "), " "); + assert_eq!(space1("a"), "a"); + assert_eq!(space1("a "), "a "); + assert_eq!(space1("ab"), "ab"); + assert_eq!(space1("😼"), "😼"); + assert_eq!(space1("😼b"), "😼b"); +} + +#[test] +fn test_colon_or_space() { + assert_eq!(colon_or_space(""), Ok("")); + assert_eq!(colon_or_space(" "), Ok("")); + assert_eq!(colon_or_space(":"), Ok("")); + assert_eq!(colon_or_space(" :"), Ok("")); + assert_eq!(colon_or_space(": "), Ok("")); + assert_eq!(colon_or_space(" : "), Ok("")); + assert_eq!(colon_or_space(" :: "), Ok(": ")); + assert_eq!(colon_or_space("😸"), Ok("😸")); + assert_eq!(colon_or_space("😸😸"), Ok("😸😸")); + assert_eq!(colon_or_space("😸:"), Ok("😸:")); + assert_eq!(colon_or_space("😸 "), Ok("😸 ")); + assert_eq!(colon_or_space(" 😸"), Ok("😸")); + assert_eq!(colon_or_space(":😸"), Ok("😸")); + assert_eq!(colon_or_space(":😸 "), Ok("😸 ")); + assert_eq!(colon_or_space(" :😸"), Ok("😸")); + assert_eq!(colon_or_space(" :😸 "), Ok("😸 ")); + assert_eq!(colon_or_space(" :😸:"), Ok("😸:")); + assert_eq!(colon_or_space(": 😸"), Ok("😸")); + assert_eq!(colon_or_space(": 😸"), Ok(" 😸")); + assert_eq!(colon_or_space(": :😸"), Ok(":😸")); + assert_eq!(colon_or_space(" : 😸"), Ok("😸")); + assert_eq!(colon_or_space(" ::😸"), Ok(":😸")); + assert_eq!(colon_or_space(" :: 😸"), Ok(": 😸")); +} diff --git a/src/format/strftime.rs b/src/format/strftime.rs index fb131bd71b..5f8873037c 100644 --- a/src/format/strftime.rs +++ b/src/format/strftime.rs @@ -466,28 +466,45 @@ impl<'a> Iterator for StrftimeItems<'a> { } } - // the next item is space + // whitespace Some(c) if c.is_whitespace() => { - // `%` is not a whitespace, so `c != '%'` is redundant - let nextspec = self - .remainder - .find(|c: char| !c.is_whitespace()) - .unwrap_or(self.remainder.len()); - assert!(nextspec > 0); - let item = sp!(&self.remainder[..nextspec]); - self.remainder = &self.remainder[nextspec..]; + let ws = self.remainder; + let mut end: usize = 0; + for (offset, c_) in self.remainder.char_indices() { + if !c_.is_whitespace() { + break; + } + // advance `end` byte offset by 1 char + end = offset; + } + // get the offset of the last char too + end += match &self.remainder[end..].char_indices().nth(1) { + Some((offset, _c)) => *offset, + None => self.remainder[end..].len(), + }; + self.remainder = &self.remainder[end..]; + let item = sp!(&ws[..end]); Some(item) } - // the next item is literal - _ => { - let nextspec = self - .remainder - .find(|c: char| c.is_whitespace() || c == '%') - .unwrap_or(self.remainder.len()); - assert!(nextspec > 0); - let item = lit!(&self.remainder[..nextspec]); - self.remainder = &self.remainder[nextspec..]; + // literals + Some(_) => { + let ws = self.remainder; + let mut end: usize = 0; + for (offset, c) in self.remainder.char_indices() { + if c.is_whitespace() || c == '%' { + break; + } + // advance `end` byte offset by 1 char + end = offset; + } + // get the offset of the last char too + end += match &self.remainder[end..].char_indices().nth(1) { + Some((offset, _)) => *offset, + None => self.remainder[end..].len(), + }; + self.remainder = &self.remainder[end..]; + let item = lit!(&ws[..end]); Some(item) } } diff --git a/src/naive/date.rs b/src/naive/date.rs index ca09270b87..46aa1ac817 100644 --- a/src/naive/date.rs +++ b/src/naive/date.rs @@ -2031,13 +2031,10 @@ impl str::FromStr for NaiveDate { fn from_str(s: &str) -> ParseResult { const ITEMS: &[Item<'static>] = &[ Item::Numeric(Numeric::Year, Pad::Zero), - Item::Space(""), Item::Literal("-"), Item::Numeric(Numeric::Month, Pad::Zero), - Item::Space(""), Item::Literal("-"), Item::Numeric(Numeric::Day, Pad::Zero), - Item::Space(""), ]; let mut parsed = Parsed::new(); @@ -2737,14 +2734,14 @@ mod tests { // valid cases let valid = [ "-0000000123456-1-2", - " -123456 - 1 - 2 ", + "-123456-1-2", "-12345-1-2", "-1234-12-31", "-7-6-5", "350-2-28", "360-02-29", "0360-02-29", - "2015-2 -18", + "2015-2-18", "2015-02-18", "+70-2-18", "+70000-2-18", @@ -2780,21 +2777,30 @@ mod tests { // some invalid cases // since `ParseErrorKind` is private, all we can do is to check if there was an error let invalid = [ - "", // empty - "x", // invalid - "Fri, 09 Aug 2013 GMT", // valid date, wrong format - "Sat Jun 30 2012", // valid date, wrong format - "1441497364.649", // valid datetime, wrong format - "+1441497364.649", // valid datetime, wrong format - "+1441497364", // valid datetime, wrong format - "2014/02/03", // valid date, wrong format - "2014", // datetime missing data - "2014-01", // datetime missing data - "2014-01-00", // invalid day - "2014-11-32", // invalid day - "2014-13-01", // invalid month - "2014-13-57", // invalid month, day - "9999999-9-9", // invalid year (out of bounds) + "", // empty + "x", // invalid + "Fri, 09 Aug 2013 GMT", // valid date, wrong format + "Sat Jun 30 2012", // valid date, wrong format + "1441497364.649", // valid datetime, wrong format + "+1441497364.649", // valid datetime, wrong format + "+1441497364", // valid datetime, wrong format + "2014/02/03", // valid date, wrong format + "2014", // datetime missing data + "2014-01", // datetime missing data + "2014-01-00", // invalid day + "2014-11-32", // invalid day + "2014-13-01", // invalid month + "2014-13-57", // invalid month, day + "2001 -02-03", // space after year + "2001- 02-03", // space before month + "2001 - 02-03", // space around year-month divider + "2001-02 -03", // space after month + "2001-02- 03", // space before day + "2001-02 - 03", // space around month-day divider + "2001-02-03 ", // trailing space + " 2001-02-03", // leading space + " -123456 - 1 - 2 ", // many spaces + "9999999-9-9", // invalid year (out of bounds) ]; for &s in &invalid { eprintln!("test_date_from_str invalid {:?}", s); @@ -2810,7 +2816,7 @@ mod tests { Ok(ymd(2014, 5, 7)) ); // ignore time and offset assert_eq!( - NaiveDate::parse_from_str("2015-W06-1=2015-033", "%G-W%V-%u = %Y-%j"), + NaiveDate::parse_from_str("2015-W06-1=2015-033", "%G-W%V-%u=%Y-%j"), Ok(ymd(2015, 2, 2)) ); assert_eq!( diff --git a/src/naive/datetime/tests.rs b/src/naive/datetime/tests.rs index bb894bee06..de11d21a8d 100644 --- a/src/naive/datetime/tests.rs +++ b/src/naive/datetime/tests.rs @@ -119,7 +119,6 @@ fn test_datetime_from_str() { "2015-2-18T23:16:09.153", "-77-02-18T23:16:09", "+82701-05-6T15:9:60.898989898989", - " +82701 - 05 - 6 T 15 : 9 : 60.898989898989 ", ]; for &s in &valid { eprintln!("test_parse_naivedatetime valid {:?}", s); @@ -148,9 +147,9 @@ fn test_datetime_from_str() { // some invalid cases // since `ParseErrorKind` is private, all we can do is to check if there was an error let invalid = [ - "", // empty - "x", // invalid / missing data - "15", // missing data + "", // empty + "x", // invalid / missing data + "15", // missing data "15:8:9", // looks like a time (invalid date) "15-8-9", // looks like a date (invalid) "Fri, 09 Aug 2013 23:54:35 GMT", // valid date, wrong format @@ -162,9 +161,23 @@ fn test_datetime_from_str() { "2015-15-15T15:15:15", // valid date, wrong format "2012-12-12T12:12:12x", // bad timezone / trailing literal "2012-123-12T12:12:12", // invalid month + "2012 -12-12T12:12:12", // space after year + "2012 -12-12T12:12:12", // multi space after year + "2012- 12-12T12:12:12", // space before month + "2012- 12-12T12:12:12", // multi space before month + "2012-12-12 T12:12:12", // space after day + "2012-12-12T 12:12:12", // space after date-time divider + "2012-12-12T12 :12:12", // space after hour + "2012-12-12T12 :12:12", // multi space after hour + "2012-12-12T12: 12:12", // space before minute + "2012-12-12T12: 12:12", // multi space before minute + "2012-12-12T12 : 12:12", // space around hour-minute divider + "2012-12-12T12:12:12 ", // trailing space + " 2012-12-12T12:12:12", // leading space "2012-12-12t12:12:12", // bad divider 't' "+ 82701-123-12T12:12:12", // strange year, invalid month "+802701-123-12T12:12:12", // out-of-bound year, invalid month + " +82701 - 05 - 6 T 15 : 9 : 60.898989898989 ", // many spaces ]; for &s in &invalid { eprintln!("test_datetime_from_str invalid {:?}", s); @@ -180,8 +193,12 @@ fn test_datetime_parse_from_str() { NaiveDateTime::parse_from_str("2014-5-7T12:34:56+09:30", "%Y-%m-%dT%H:%M:%S%z"), Ok(ymdhms(2014, 5, 7, 12, 34, 56)) ); // ignore offset + assert!( + // intermixed whitespace + NaiveDateTime::parse_from_str("2015-W06-1 000000", "%G-W%V-%u%H%M%S").is_err() + ); assert_eq!( - NaiveDateTime::parse_from_str("2015-W06-1 000000", "%G-W%V-%u%H%M%S"), + NaiveDateTime::parse_from_str("2015-W06-1 000000", "%G-W%V-%u %H%M%S"), Ok(ymdhms(2015, 2, 2, 0, 0, 0)) ); assert_eq!( diff --git a/src/naive/time/mod.rs b/src/naive/time/mod.rs index 84305bf8f5..7ab0dffb73 100644 --- a/src/naive/time/mod.rs +++ b/src/naive/time/mod.rs @@ -1300,14 +1300,11 @@ impl str::FromStr for NaiveTime { fn from_str(s: &str) -> ParseResult { const ITEMS: &[Item<'static>] = &[ Item::Numeric(Numeric::Hour, Pad::Zero), - Item::Space(""), Item::Literal(":"), Item::Numeric(Numeric::Minute, Pad::Zero), - Item::Space(""), Item::Literal(":"), Item::Numeric(Numeric::Second, Pad::Zero), Item::Fixed(Fixed::Nanosecond), - Item::Space(""), ]; let mut parsed = Parsed::new(); diff --git a/src/naive/time/tests.rs b/src/naive/time/tests.rs index dfd9f6a16d..2ed2f1c600 100644 --- a/src/naive/time/tests.rs +++ b/src/naive/time/tests.rs @@ -196,9 +196,6 @@ fn test_date_from_str() { "0:0:0", "0:0:0.0000000", "0:0:0.0000003", - " 4 : 3 : 2.1 ", - " 09:08:07 ", - " 9:8:07 ", "01:02:03", "4:3:2.1", "9:8:7", @@ -266,6 +263,18 @@ fn test_date_from_str() { "1441497364.649", // valid datetime, not a NaiveTime "+1441497364.649", // valid datetime, not a NaiveTime "+1441497364", // valid datetime, not a NaiveTime + "01 :02:03", // space after hour + "01: 02:03", // space before minute + "01 : 02:03", // space around hour-minute divider + "01:02 :03", // space after minute + "01:02: 03", // space before second + "01:02 : 03", // space around minute-second divider + "01:02:03 .456", // space after second + "01:02:03. 456", // space before fraction + "01:02:03 ", // trailing space + "01:02:03.456 ", // trailing space + " 01:02:03", // leading space + " 4 : 3 : 2.1 ", // spaces intermixed throughout "001:02:03", // invalid hour "01:002:03", // invalid minute "01:02:003", // invalid second @@ -293,9 +302,9 @@ fn test_time_parse_from_str() { NaiveTime::parse_from_str("\t\t1259\t\tPM\t", "\t\t%H%M\t\t%P\t"), Ok(hms(12, 59, 0)) ); - assert!(NaiveTime::parse_from_str("12:59 PM", "%H:%M\t%P").is_ok()); - assert!(NaiveTime::parse_from_str("\t\t12:59 PM\t", "\t\t%H:%M\t%P\t").is_ok()); - assert!(NaiveTime::parse_from_str("12:59 PM", "%H:%M %P").is_ok()); + assert!(NaiveTime::parse_from_str("12:59 PM", "%H:%M\t%P").is_err()); + assert!(NaiveTime::parse_from_str("\t\t12:59 PM\t", "\t\t%H:%M\t%P\t").is_err()); + assert!(NaiveTime::parse_from_str("12:59 PM", "%H:%M %P").is_err()); assert!(NaiveTime::parse_from_str("12:3456", "%H:%M:%S").is_err()); }