Skip to content

Commit

Permalink
[fix #5044] Support converting 'yyyymmdd' format to date
Browse files Browse the repository at this point in the history
Signed-off-by: tangruilin <tang.ruilin@foxmail.com>
  • Loading branch information
Tangruilin committed Nov 15, 2023
1 parent 7ba36b0 commit dfe2d05
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 1 deletion.
34 changes: 34 additions & 0 deletions arrow-cast/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4861,6 +4861,40 @@ mod tests {
}
}

#[test]
fn test_cast_string_format_yyyymmdd_and_yyyymmdd_to_date32() {
let a = Arc::new(StringArray::from(vec![
Some("2020-12-25"),
Some("201225"),
Some("991106"),
Some("20201117"),
])) as ArrayRef;

let to_type = DataType::Date32;
let options = CastOptions {
safe: false,
format_options: FormatOptions::default(),
};
let result = cast_with_options(&a, &to_type, &options).unwrap();
let c = result.as_primitive::<Date32Type>();
assert_eq!(
chrono::NaiveDate::from_ymd_opt(2020, 12, 25),
c.value_as_date(0)
);
assert_eq!(
chrono::NaiveDate::from_ymd_opt(2020, 12, 25),
c.value_as_date(1)
);
assert_eq!(
chrono::NaiveDate::from_ymd_opt(1999, 11, 6),
c.value_as_date(2)
);
assert_eq!(
chrono::NaiveDate::from_ymd_opt(2020, 11, 17),
c.value_as_date(3)
);
}

#[test]
fn test_cast_string_to_time32second() {
let a1 = Arc::new(StringArray::from(vec![
Expand Down
39 changes: 38 additions & 1 deletion arrow-cast/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,18 @@ const EPOCH_DAYS_FROM_CE: i32 = 719_163;
/// Error message if nanosecond conversion request beyond supported interval
const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804";

fn parse_two_digit_year(input_year: i32) -> i32 {
let current_year = Utc::now().naive_utc().year();
let threshold = current_year % 100 + 50;
let year_prefix = current_year / 100;

if input_year <= threshold {
year_prefix * 100 + input_year
} else {
(year_prefix - 1) * 100 + input_year
}
}

fn parse_date(string: &str) -> Option<NaiveDate> {
if string.len() > 10 {
return None;
Expand All @@ -560,7 +572,32 @@ fn parse_date(string: &str) -> Option<NaiveDate> {
const HYPHEN: u8 = b'-'.wrapping_sub(b'0');

if digits[4] != HYPHEN {
return None;
// In this case, the string may be type "yymmdd" or "yyyymmdd", so we should check it
if string.len() != 6 && string.len() != 8 {
return None;
}
for ch in string.bytes() {
if ch < b'0' || ch > b'9' {
return None;
}
}
let (year, month, day) = match string.len() {
6 => (
parse_two_digit_year(digits[0] as i32 * 10 + digits[1] as i32) as u16,
digits[2] * 10 + digits[3],
digits[4] * 10 + digits[5],
),
8 => (
digits[0] as u16 * 1000
+ digits[1] as u16 * 100
+ digits[2] as u16 * 10
+ digits[3] as u16,
digits[4] * 10 + digits[5],
digits[6] * 10 + digits[7],
),
_ => return None,
};
return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
}

let (month, day) = match mask {
Expand Down

0 comments on commit dfe2d05

Please sign in to comment.