From dfe2d0555cd4b77f5349371ae1bfaa2340a6370e Mon Sep 17 00:00:00 2001 From: tangruilin Date: Tue, 14 Nov 2023 20:48:33 +0800 Subject: [PATCH] [fix #5044] Support converting 'yyyymmdd' format to date Signed-off-by: tangruilin --- arrow-cast/src/cast.rs | 34 ++++++++++++++++++++++++++++++++++ arrow-cast/src/parse.rs | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index e44133f81b4..c9c7ebe6208 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -4861,6 +4861,40 @@ mod tests { } } + #[test] + fn test_cast_string_format_yyyymmdd_and_yyyymmdd_to_date32() { + let a = Arc::new(StringArray::from(vec![ + Some("2020-12-25"), + Some("201225"), + Some("991106"), + Some("20201117"), + ])) as ArrayRef; + + let to_type = DataType::Date32; + let options = CastOptions { + safe: false, + format_options: FormatOptions::default(), + }; + let result = cast_with_options(&a, &to_type, &options).unwrap(); + let c = result.as_primitive::(); + assert_eq!( + chrono::NaiveDate::from_ymd_opt(2020, 12, 25), + c.value_as_date(0) + ); + assert_eq!( + chrono::NaiveDate::from_ymd_opt(2020, 12, 25), + c.value_as_date(1) + ); + assert_eq!( + chrono::NaiveDate::from_ymd_opt(1999, 11, 6), + c.value_as_date(2) + ); + assert_eq!( + chrono::NaiveDate::from_ymd_opt(2020, 11, 17), + c.value_as_date(3) + ); + } + #[test] fn test_cast_string_to_time32second() { let a1 = Arc::new(StringArray::from(vec![ diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index f01b2b4c0d6..5ac94264e95 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -544,6 +544,18 @@ const EPOCH_DAYS_FROM_CE: i32 = 719_163; /// Error message if nanosecond conversion request beyond supported interval const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"; +fn parse_two_digit_year(input_year: i32) -> i32 { + let current_year = Utc::now().naive_utc().year(); + let threshold = current_year % 100 + 50; + let year_prefix = current_year / 100; + + if input_year <= threshold { + year_prefix * 100 + input_year + } else { + (year_prefix - 1) * 100 + input_year + } +} + fn parse_date(string: &str) -> Option { if string.len() > 10 { return None; @@ -560,7 +572,32 @@ fn parse_date(string: &str) -> Option { const HYPHEN: u8 = b'-'.wrapping_sub(b'0'); if digits[4] != HYPHEN { - return None; + // In this case, the string may be type "yymmdd" or "yyyymmdd", so we should check it + if string.len() != 6 && string.len() != 8 { + return None; + } + for ch in string.bytes() { + if ch < b'0' || ch > b'9' { + return None; + } + } + let (year, month, day) = match string.len() { + 6 => ( + parse_two_digit_year(digits[0] as i32 * 10 + digits[1] as i32) as u16, + digits[2] * 10 + digits[3], + digits[4] * 10 + digits[5], + ), + 8 => ( + digits[0] as u16 * 1000 + + digits[1] as u16 * 100 + + digits[2] as u16 * 10 + + digits[3] as u16, + digits[4] * 10 + digits[5], + digits[6] * 10 + digits[7], + ), + _ => return None, + }; + return NaiveDate::from_ymd_opt(year as _, month as _, day as _); } let (month, day) = match mask {