Skip to content

Commit

Permalink
timezone allows leading MINUS SIGN (U+2212)
Browse files Browse the repository at this point in the history
Timezone signage also allows MINUS SIGN (U+2212) as
specified by ISO 8601 and RFC 3339.

Not for RFC 2822 format or RFC 8536 transition string.

Issue #835
  • Loading branch information
jtmoon79 committed Jun 9, 2023
1 parent 83272d7 commit 1f3513d
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 12 deletions.
62 changes: 56 additions & 6 deletions src/format/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,8 @@ fn test_parse() {
check!("a", [lit!("a")]; );
check!("+", [lit!("+")]; );
check!("-", [lit!("-")]; );
check!("−", [lit!("−")]; ); // MINUS SIGN (U+2212)
// a Literal may contain whitespace and match whitespace, but this should not be done
check!(" ", [lit!(" ")]; );
check!("aa", [lit!("a")]; TOO_LONG);
check!("A", [lit!("a")]; INVALID);
Expand All @@ -603,6 +605,7 @@ fn test_parse() {
check!("1234", [lit!("1234")]; );
check!("+1234", [lit!("+1234")]; );
check!("-1234", [lit!("-1234")]; );
check!("−1234", [lit!("−1234")]; ); // MINUS SIGN (U+2212)
check!("PST", [lit!("PST")]; );
check!("🤠", [lit!("🤠")]; );
check!("🤠a", [lit!("🤠"), lit!("a")]; );
Expand Down Expand Up @@ -651,11 +654,17 @@ fn test_parse() {
check!("-0042", [num!(Year)]; year: -42);
check!("+0042", [num!(Year)]; year: 42);
check!("-42195", [num!(Year)]; year: -42195);
check!("−42195", [num!(Year)]; INVALID); // MINUS SIGN (U+2212)
check!("+42195", [num!(Year)]; year: 42195);
check!(" -42195", [num!(Year)]; year: -42195);
check!(" +42195", [num!(Year)]; year: 42195);
check!(" - 42", [num!(Year)]; INVALID);
check!(" + 42", [num!(Year)]; INVALID);
check!(" -42195", [sp!(" "), num!(Year)]; year: -42195);
check!(" −42195", [sp!(" "), num!(Year)]; INVALID); // MINUS SIGN (U+2212)
check!(" +42195", [sp!(" "), num!(Year)]; year: 42195);
check!(" - 42", [sp!(" "), num!(Year)]; INVALID);
check!(" + 42", [sp!(" "), num!(Year)]; INVALID);
check!("-", [num!(Year)]; TOO_SHORT);
check!("+", [num!(Year)]; TOO_SHORT);

Expand All @@ -664,6 +673,13 @@ fn test_parse() {
check!("+345", [num!(Ordinal)]; INVALID);
check!("-345", [num!(Ordinal)]; INVALID);
check!(" 345", [num!(Ordinal)]; ordinal: 345);
check!("−345", [num!(Ordinal)]; INVALID); // MINUS SIGN (U+2212)
check!("345 ", [num!(Ordinal)]; TOO_LONG);
check!(" 345", [sp!(" "), num!(Ordinal)]; ordinal: 345);
check!("345 ", [num!(Ordinal), sp!(" ")]; ordinal: 345);
check!("345🤠 ", [num!(Ordinal), lit!("🤠"), sp!(" ")]; ordinal: 345);
check!("345🤠", [num!(Ordinal)]; TOO_LONG);
check!("\u{0363}345", [num!(Ordinal)]; INVALID);
check!(" +345", [num!(Ordinal)]; INVALID);
check!(" -345", [num!(Ordinal)]; INVALID);

Expand Down Expand Up @@ -813,6 +829,7 @@ fn test_parse() {
check!("+12:3", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+12:34", [fix!(TimezoneOffset)]; offset: 45_240);
check!("-12:34", [fix!(TimezoneOffset)]; offset: -45_240);
check!("−12:34", [fix!(TimezoneOffset)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12:34:", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12:34:5", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12:34:56", [fix!(TimezoneOffset)]; TOO_LONG);
Expand All @@ -832,12 +849,14 @@ fn test_parse() {
check!("+1234:567", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("-00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("−00:00", [fix!(TimezoneOffset)]; offset: 0); // MINUS SIGN (U+2212)
check!("+00:01", [fix!(TimezoneOffset)]; offset: 60);
check!("-00:01", [fix!(TimezoneOffset)]; offset: -60);
check!("+00:30", [fix!(TimezoneOffset)]; offset: 1_800);
check!("-00:30", [fix!(TimezoneOffset)]; offset: -1_800);
check!("+24:00", [fix!(TimezoneOffset)]; offset: 86_400);
check!("-24:00", [fix!(TimezoneOffset)]; offset: -86_400);
check!("−24:00", [fix!(TimezoneOffset)]; offset: -86_400); // MINUS SIGN (U+2212)
check!("+99:59", [fix!(TimezoneOffset)]; offset: 359_940);
check!("-99:59", [fix!(TimezoneOffset)]; offset: -359_940);
check!("+00:60", [fix!(TimezoneOffset)]; OUT_OF_RANGE);
Expand All @@ -847,6 +866,8 @@ fn test_parse() {
check!("+12 34 ", [fix!(TimezoneOffset)]; TOO_LONG);
check!(" +12:34", [fix!(TimezoneOffset)]; offset: 45_240);
check!(" -12:34", [fix!(TimezoneOffset)]; offset: -45_240);
check!(" −12:34", [fix!(TimezoneOffset)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("12:34 ", [fix!(TimezoneOffset)]; INVALID);
check!(" +12:34", [fix!(TimezoneOffset)]; offset: 45_240);
check!(" -12:34", [fix!(TimezoneOffset)]; offset: -45_240);
check!("\t -12:34", [fix!(TimezoneOffset)]; offset: -45_240);
Expand All @@ -858,7 +879,6 @@ fn test_parse() {
check!("-12: 34", [fix!(TimezoneOffset)]; offset: -45_240);
check!("-12 :34", [fix!(TimezoneOffset)]; offset: -45_240);
check!("-12 : 34", [fix!(TimezoneOffset)]; offset: -45_240);
check!("12:34 ", [fix!(TimezoneOffset)]; INVALID);
check!(" 12:34", [fix!(TimezoneOffset)]; INVALID);
check!("", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+", [fix!(TimezoneOffset)]; TOO_SHORT);
Expand All @@ -869,13 +889,16 @@ fn test_parse() {
check!("X12:34", [fix!(TimezoneOffset)]; INVALID);
check!("Z+12:34", [fix!(TimezoneOffset)]; INVALID);
check!("X+12:34", [fix!(TimezoneOffset)]; INVALID);
check!("X−12:34", [fix!(TimezoneOffset)]; INVALID); // MINUS SIGN (U+2212)
check!("🤠+12:34", [fix!(TimezoneOffset)]; INVALID);
check!("+12:34🤠", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12:🤠34", [fix!(TimezoneOffset)]; INVALID);
check!("+1234🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: 45_240);
check!("-1234🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240);
check!("−1234🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12:34🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: 45_240);
check!("-12:34🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240);
check!("−12:34🤠", [fix!(TimezoneOffset), lit!("🤠")]; offset: -45_240); // MINUS SIGN (U+2212)
check!("🤠+12:34", [lit!("🤠"), fix!(TimezoneOffset)]; offset: 45_240);
check!("Z", [fix!(TimezoneOffset)]; INVALID);
check!("A", [fix!(TimezoneOffset)]; INVALID);
Expand Down Expand Up @@ -904,6 +927,7 @@ fn test_parse() {
check!("+123", [fix!(TimezoneOffsetColon)]; TOO_SHORT);
check!("+1234", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("-1234", [fix!(TimezoneOffsetColon)]; offset: -45_240);
check!("−1234", [fix!(TimezoneOffsetColon)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12345", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+123456", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+1234567", [fix!(TimezoneOffsetColon)]; TOO_LONG);
Expand All @@ -920,6 +944,7 @@ fn test_parse() {
check!("+12:3", [fix!(TimezoneOffsetColon)]; TOO_SHORT);
check!("+12:34", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("-12:34", [fix!(TimezoneOffsetColon)]; offset: -45_240);
check!("−12:34", [fix!(TimezoneOffsetColon)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12:34:", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+12:34:5", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+12:34:56", [fix!(TimezoneOffsetColon)]; TOO_LONG);
Expand All @@ -928,6 +953,8 @@ fn test_parse() {
check!("+12:34:56:78", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+12:3456", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("+1234:56", [fix!(TimezoneOffsetColon)]; TOO_LONG);
check!("−12:34", [fix!(TimezoneOffsetColon)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("−12 : 34", [fix!(TimezoneOffsetColon)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12 :34", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("+12: 34", [fix!(TimezoneOffsetColon)]; offset: 45_240);
check!("+12 34", [fix!(TimezoneOffsetColon)]; offset: 45_240);
Expand Down Expand Up @@ -988,6 +1015,7 @@ fn test_parse() {
check!("+123", [fix!(TimezoneOffsetZ)]; TOO_SHORT);
check!("+1234", [fix!(TimezoneOffsetZ)]; offset: 45_240);
check!("-1234", [fix!(TimezoneOffsetZ)]; offset: -45_240);
check!("−1234", [fix!(TimezoneOffsetZ)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12345", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+123456", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+1234567", [fix!(TimezoneOffsetZ)]; TOO_LONG);
Expand All @@ -1004,6 +1032,7 @@ fn test_parse() {
check!("+12:3", [fix!(TimezoneOffsetZ)]; TOO_SHORT);
check!("+12:34", [fix!(TimezoneOffsetZ)]; offset: 45_240);
check!("-12:34", [fix!(TimezoneOffsetZ)]; offset: -45_240);
check!("−12:34", [fix!(TimezoneOffsetZ)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12:34:", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+12:34:5", [fix!(TimezoneOffsetZ)]; TOO_LONG);
check!("+12:34:56", [fix!(TimezoneOffsetZ)]; TOO_LONG);
Expand Down Expand Up @@ -1074,6 +1103,7 @@ fn test_parse() {
check!("+123", [internal_fix!(TimezoneOffsetPermissive)]; TOO_SHORT);
check!("+1234", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240);
check!("-1234", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!("−1234", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12345", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+123456", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+1234567", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
Expand All @@ -1090,6 +1120,7 @@ fn test_parse() {
check!("+12:3", [internal_fix!(TimezoneOffsetPermissive)]; TOO_SHORT);
check!("+12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240);
check!("-12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!("−12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12:34:", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+12:34:5", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!("+12:34:56", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
Expand Down Expand Up @@ -1118,6 +1149,7 @@ fn test_parse() {
check!("+12:34 ", [internal_fix!(TimezoneOffsetPermissive)]; TOO_LONG);
check!(" +12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: 45_240);
check!(" -12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240);
check!(" −12:34", [internal_fix!(TimezoneOffsetPermissive)]; offset: -45_240); // MINUS SIGN (U+2212)
check!("+12345", [internal_fix!(TimezoneOffsetPermissive), num!(Day)]; offset: 45_240, day: 5);
check!("+12:345", [internal_fix!(TimezoneOffsetPermissive), num!(Day)]; offset: 45_240, day: 5);
check!("+12:34:", [internal_fix!(TimezoneOffsetPermissive), lit!(":")]; offset: 45_240);
Expand Down Expand Up @@ -1164,6 +1196,16 @@ fn test_parse() {
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2,
minute: 37, second: 5, offset: 32400);
check!("2015-02-04T14:37:05-09:00",
[num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"),
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2,
minute: 37, second: 5, offset: -32400);
check!("2015-02-04T14:37:05−09:00", // timezone offset using MINUS SIGN (U+2212)
[num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"),
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year: 2015, month: 2, day: 4, hour_div_12: 1, hour_mod_12: 2,
minute: 37, second: 5, offset: -32400);
check!("20150204143705567",
[num!(Year), num!(Month), num!(Day),
num!(Hour), num!(Minute), num!(Second), internal_fix!(Nanosecond3NoDot)];
Expand Down Expand Up @@ -1367,15 +1409,21 @@ fn test_rfc3339() {
// Test data - (input, Ok(expected result after parse and format) or Err(error code))
let testdates = [
("2015-01-20T17:35:20-08:00", Ok("2015-01-20T17:35:20-08:00")), // normal case
("2015-01-20T17:35:20−08:00", Ok("2015-01-20T17:35:20-08:00")), // normal case with MINUS SIGN (U+2212)
("1944-06-06T04:04:00Z", Ok("1944-06-06T04:04:00+00:00")), // D-day
("2001-09-11T09:45:00-08:00", Ok("2001-09-11T09:45:00-08:00")),
("2015-01-20T17:35:20.001-08:00", Ok("2015-01-20T17:35:20.001-08:00")),
("2015-01-20T17:35:20.001−08:00", Ok("2015-01-20T17:35:20.001-08:00")), // with MINUS SIGN (U+2212)
("2015-01-20T17:35:20.000031-08:00", Ok("2015-01-20T17:35:20.000031-08:00")),
("2015-01-20T17:35:20.000000004-08:00", Ok("2015-01-20T17:35:20.000000004-08:00")),
("2015-01-20T17:35:20.000000004−08:00", Ok("2015-01-20T17:35:20.000000004-08:00")), // with MINUS SIGN (U+2212)
("2015-01-20T17:35:20.000000000452-08:00", Ok("2015-01-20T17:35:20-08:00")), // too small
("2015-01-20T17:35:20.000000000452−08:00", Ok("2015-01-20T17:35:20-08:00")), // too small with MINUS SIGN (U+2212)
("2015-01-20 17:35:20.001-08:00", Err(INVALID)), // missing separator 'T'
("2015/01/20T17:35:20.001-08:00", Err(INVALID)), // wrong separator char YMD
("2015-01-20T17-35-20.001-08:00", Err(INVALID)), // wrong separator char HMS
("-01-20T17:35:20-08:00", Err(INVALID)), // missing year
("99-01-20T17:35:20-08:00", Err(INVALID)), // bad year format
("99999-01-20T17:35:20-08:00", Err(INVALID)), // bad year value
("-2000-01-20T17:35:20-08:00", Err(INVALID)), // bad year value
("2015-02-30T17:35:20-08:00", Err(OUT_OF_RANGE)), // bad day of month value
Expand Down Expand Up @@ -1404,11 +1452,13 @@ fn test_rfc3339() {
("2015-01-20T17:35:20-08:ZZ", Err(INVALID)), // bad offset minutes
("2015-01-20T17:35:20.001-08 : 00", Err(INVALID)), // bad offset separator
("2015-01-20T17:35:20-08:00:00", Err(TOO_LONG)), // bad offset format
("2015-01-20T17:35:20-08:", Err(TOO_SHORT)), // bad offset format
("2015-01-20T17:35:20-08", Err(TOO_SHORT)), // bad offset format
("2015-01-20T", Err(TOO_SHORT)), // missing HMS
("2015-01-20T00:00:1", Err(TOO_SHORT)), // missing complete S
("2015-01-20T00:00:1-08:00", Err(INVALID)), // missing complete S
("2015-01-20T17:35:20+08:", Err(TOO_SHORT)), // bad offset format
("2015-01-20T17:35:20-08:", Err(TOO_SHORT)), // bad offset format
("2015-01-20T17:35:20−08:", Err(TOO_SHORT)), // bad offset format with MINUS SIGN (U+2212)
("2015-01-20T17:35:20-08", Err(TOO_SHORT)), // bad offset format
("2015-01-20T", Err(TOO_SHORT)), // missing HMS
("2015-01-20T00:00:1", Err(TOO_SHORT)), // missing complete S
("2015-01-20T00:00:1-08:00", Err(INVALID)), // missing complete S
];

fn rfc3339_to_datetime(date: &str) -> ParseResult<DateTime<FixedOffset>> {
Expand Down
32 changes: 29 additions & 3 deletions src/format/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,13 +211,28 @@ pub(super) fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str
where
F: FnMut(&str) -> ParseResult<&str>,
{
timezone_offset_internal(s, consume_colon, false)
timezone_offset_internal(s, consume_colon, false, true)
}

/// Parse a timezone from `s` and return the offset in seconds.
///
/// The `consume_colon` function is used to parse a mandatory or optional `:`
/// separator between hours offset and minutes offset.
///
/// The `allow_missing_minutes` flag allows the timezone minutes offset to be
/// missing from `s`.
///
/// The `allow_tz_minus_sign` flag allows the timezone offset negative character
/// to also be `−` MINUS SIGN (U+2212) in addition to the typical
/// ASCII-compatible `-` HYPHEN-MINUS (U+2D).
/// This is part of [RFC 3339 & ISO 8601].
///
/// [RFC 3339 & ISO 8601]: https://en.wikipedia.org/w/index.php?title=ISO_8601&oldid=1114309368#Time_offsets_from_UTC
fn timezone_offset_internal<F>(
mut s: &str,
mut consume_colon: F,
allow_missing_minutes: bool,
allow_tz_minus_sign: bool,
) -> ParseResult<(&str, i32)>
where
F: FnMut(&str) -> ParseResult<&str>,
Expand All @@ -232,15 +247,26 @@ where
}
let negative = match s.chars().next() {
Some('+') => {
// PLUS SIGN (U+2B)
s = &s['+'.len_utf8()..];

false
}
Some('-') => {
// HYPHEN-MINUS (U+2D)
s = &s['-'.len_utf8()..];

true
}
Some('−') => {
// MINUS SIGN (U+2212)
if !allow_tz_minus_sign {
return Err(INVALID);
}
s = &s['−'.len_utf8()..];

true
}
Some(_) => return Err(INVALID),
None => return Err(TOO_SHORT),
};
Expand Down Expand Up @@ -309,7 +335,7 @@ where
{
match s.as_bytes().first() {
Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
_ => timezone_offset_internal(s, colon, true),
_ => timezone_offset_internal(s, colon, true, true),
}
}

Expand Down Expand Up @@ -347,7 +373,7 @@ pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)>
Ok((s, None))
}
} else {
let (s_, offset) = timezone_offset(s, |s| Ok(s))?;
let (s_, offset) = timezone_offset_internal(s, |s| Ok(s), false, false)?;
Ok((s_, Some(offset)))
}
}
Expand Down

0 comments on commit 1f3513d

Please sign in to comment.