From 270c45b5a328d09d39f2fe019d380d4488d73a39 Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Mon, 11 May 2026 22:31:26 -0400 Subject: [PATCH] fix issue --- datafusion/functions/src/datetime/date_part.rs | 15 +++++++++++++-- .../spark/src/function/datetime/date_part.rs | 7 +++++-- .../test_files/datetime/date_part.slt | 8 ++++---- docs/source/user-guide/sql/scalar_functions.md | 2 +- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 10275f5aaed2e..3c405d388bcab 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -80,7 +80,7 @@ use datafusion_macros::user_doc; - dow (day of the week where Sunday is 0) - doy (day of the year) - epoch (seconds since Unix epoch for timestamps/dates, total seconds for intervals) - - isodow (day of the week where Monday is 0) + - isodow (ISO 8601 day of the week where Monday is 1 and Sunday is 7) "# ), argument( @@ -239,7 +239,18 @@ impl ScalarUDFImpl for DatePartFunc { "qtr" | "quarter" => date_part(array.as_ref(), DatePart::Quarter)?, "doy" => date_part(array.as_ref(), DatePart::DayOfYear)?, "dow" => date_part(array.as_ref(), DatePart::DayOfWeekSunday0)?, - "isodow" => date_part(array.as_ref(), DatePart::DayOfWeekMonday0)?, + "isodow" => { + // Postgres `isodow` is 1..=7 with Mon=1. Arrow's + // `DayOfWeekMonday0` returns 0..=6 with Mon=0; shift by + // +1 to match Postgres. TODO: switch to a future + // `DatePart::DayOfWeekMonday1` upstream variant once it + // exists, so this kernel-then-add becomes a single call. + let zero_based = + date_part(array.as_ref(), DatePart::DayOfWeekMonday0)?; + let int_arr = as_int32_array(&zero_based)?; + let one_based: Int32Array = int_arr.unary(|v| v + 1); + Arc::new(one_based) as ArrayRef + } "epoch" => epoch(array.as_ref())?, _ => return exec_err!("Date part '{part}' not supported"), } diff --git a/datafusion/spark/src/function/datetime/date_part.rs b/datafusion/spark/src/function/datetime/date_part.rs index ced4865d164a3..91bdb9a55318b 100644 --- a/datafusion/spark/src/function/datetime/date_part.rs +++ b/datafusion/spark/src/function/datetime/date_part.rs @@ -125,8 +125,11 @@ impl ScalarUDFImpl for SparkDatePart { )); match part { - // Add 1 for day-of-week parts to convert 0-indexed to 1-indexed - "dow" | "isodow" => Ok(ExprSimplifyResult::Simplified( + // Spark's `dayofweek` is 1..=7 (Sun=1) but df's `dow` is 0..=6 + // (Sun=0); shift by +1. df's `isodow` already returns the + // PG-correct 1..=7 (Mon=1), which matches Spark's + // `dayofweek_iso`/`dow_iso`, so no shift is needed there. + "dow" => Ok(ExprSimplifyResult::Simplified( date_part_expr + Expr::Literal(ScalarValue::Int32(Some(1)), None), )), _ => Ok(ExprSimplifyResult::Simplified(date_part_expr)), diff --git a/datafusion/sqllogictest/test_files/datetime/date_part.slt b/datafusion/sqllogictest/test_files/datetime/date_part.slt index 07dc1302b9ece..891319f9e2cd2 100644 --- a/datafusion/sqllogictest/test_files/datetime/date_part.slt +++ b/datafusion/sqllogictest/test_files/datetime/date_part.slt @@ -1240,22 +1240,22 @@ true query I SELECT date_part('ISODOW', CAST('2000-01-01' AS DATE)) ---- -5 +6 query I SELECT EXTRACT(isodow FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- -1 +2 query I SELECT EXTRACT("isodow" FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- -1 +2 query I SELECT EXTRACT('isodow' FROM to_timestamp('2020-09-08T12:00:00+00:00')) ---- -1 +2 ## Preimage tests diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 260f69f737d1b..8023e2103ec88 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2573,7 +2573,7 @@ date_part(part, expression) - dow (day of the week where Sunday is 0) - doy (day of the year) - epoch (seconds since Unix epoch for timestamps/dates, total seconds for intervals) - - isodow (day of the week where Monday is 0) + - isodow (ISO 8601 day of the week where Monday is 1 and Sunday is 7) - **expression**: Time expression to operate on. Can be a constant, column, or function.