diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 05a1a27070cc4..f724e7fe514a8 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -9541,6 +9541,136 @@ ORDER BY "source"."str0" ASC ) } + #[tokio::test] + async fn test_filter_extract_by_year_and_week() { + init_testing_logger(); + + async fn assert_week_result(week: i32, start_date: &str, end_date: &str) { + let query_plan = convert_select_to_query_plan( + format!(r#" + SELECT COUNT(*) AS "count", + EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") AS "yr:completedAt:ok" + FROM "public"."KibanaSampleDataEcommerce" "KibanaSampleDataEcommerce" + WHERE EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") = 2019 + AND EXTRACT(WEEK FROM "KibanaSampleDataEcommerce"."order_date") = {} + GROUP BY 2 + "#, week), + DatabaseProtocol::PostgreSQL, + ).await; + + assert_eq!( + query_plan.as_logical_plan().find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]), + dimensions: Some(vec![]), + segments: Some(vec![]), + time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension { + dimension: "KibanaSampleDataEcommerce.order_date".to_string(), + granularity: Some("year".to_string()), + date_range: Some(json!(vec![start_date, end_date])), + },]), + order: Some(vec![]), + ..Default::default() + } + ) + } + + // Test week 1 (first week of 2019) + // In 2019, January 1 is a Tuesday, so ISO week 1 starts on Monday, December 31, 2018 + // But since our range is constrained to 2019, it should be Jan 1-6 + assert_week_result(1, "2019-01-01", "2019-01-06").await; + + // Test week 15 (mid-April) + // Week 15 of 2019 is April 8-14 + assert_week_result(15, "2019-04-08", "2019-04-14").await; + + // Test week 52 (end of year) + // Week 52 of 2019 is December 23-29 + assert_week_result(52, "2019-12-23", "2019-12-29").await; + } + + #[tokio::test] + async fn test_filter_extract_by_year_and_week_with_trunc() { + init_testing_logger(); + + let logical_plan = convert_select_to_query_plan( + r#" + SELECT + COUNT(*) AS "count", + EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") AS "yr:completedAt:ok" + FROM "public"."KibanaSampleDataEcommerce" "KibanaSampleDataEcommerce" + WHERE EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") = 2019 + AND CAST(TRUNC(EXTRACT(WEEK FROM "KibanaSampleDataEcommerce"."order_date")) AS INTEGER) = 15 + GROUP BY 2 + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await + .as_logical_plan(); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]), + dimensions: Some(vec![]), + segments: Some(vec![]), + time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension { + dimension: "KibanaSampleDataEcommerce.order_date".to_string(), + granularity: Some("year".to_string()), + date_range: Some(json!(vec![ + "2019-04-08".to_string(), + "2019-04-14".to_string(), + ])), + },]), + order: Some(vec![]), + ..Default::default() + } + ) + } + + #[tokio::test] + async fn test_filter_date_part_by_year_quarter_month_week() { + init_testing_logger(); + + let logical_plan = convert_select_to_query_plan( + r#" + SELECT + COUNT(*) AS "count", + DATE_PART('year', "KibanaSampleDataEcommerce"."order_date") AS "yr:completedAt:ok" + FROM "public"."KibanaSampleDataEcommerce" "KibanaSampleDataEcommerce" + WHERE DATE_PART('year', "KibanaSampleDataEcommerce"."order_date") = 2019 + AND DATE_PART('quarter', "KibanaSampleDataEcommerce"."order_date") = 2 + AND DATE_PART('month', "KibanaSampleDataEcommerce"."order_date") = 4 + AND DATE_PART('week', "KibanaSampleDataEcommerce"."order_date") = 15 + GROUP BY 2 + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await + .as_logical_plan(); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]), + dimensions: Some(vec![]), + segments: Some(vec![]), + time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension { + dimension: "KibanaSampleDataEcommerce.order_date".to_string(), + granularity: Some("year".to_string()), + date_range: Some(json!(vec![ + "2019-04-08".to_string(), + "2019-04-14".to_string(), + ])), + },]), + order: Some(vec![]), + ..Default::default() + } + ) + } + #[tokio::test] async fn test_tableau_filter_extract_by_year() { init_testing_logger(); diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs index 40b6fe9493646..e35a0cffb9c56 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs @@ -1,4 +1,4 @@ -use super::utils; +use super::utils::{self, try_merge_range_with_date_part}; use crate::compile::date_parser::parse_date_str; use crate::{ compile::rewrite::{ @@ -50,7 +50,6 @@ use datafusion::{ }; use egg::{Subst, Var}; use std::{ - cmp::{max, min}, collections::HashSet, fmt::Display, ops::{Index, IndexMut}, @@ -4025,98 +4024,18 @@ impl FilterRules { return false; }; - let new_values = match granularity.as_str() { - "month" => { - // Check that the range only covers one year - let start_date_year = start_date.year(); - if start_date_year != end_date.year() { - return false; - } - - // Month value must be valid - if !(1..=12).contains(&value) { - return false; - } - - // Obtain the new range - let Some(new_start_date) = - NaiveDate::from_ymd_opt(start_date_year, value as u32, 1) - else { - return false; - }; - let Some(new_end_date) = new_start_date - .checked_add_months(Months::new(1)) - .and_then(|date| date.checked_sub_days(Days::new(1))) - else { - return false; - }; - - // If the resulting range is outside of the original range, we can't merge - // the filters - if new_start_date > end_date || new_end_date < start_date { - return false; - } - - // Preserves existing constraints, for example: - // inDataRange: order_date >= '2019-02-15' AND order_date < '2019-03-10' - // Month filter: EXTRACT(MONTH FROM order_date) = 2 (February) - let new_start_date = max(new_start_date, start_date); - let new_end_date = min(new_end_date, end_date); - - vec![ - new_start_date.format("%Y-%m-%d").to_string(), - new_end_date.format("%Y-%m-%d").to_string(), - ] - } - "quarter" | "qtr" => { - // Check that the range only covers one year - let start_date_year = start_date.year(); - if start_date_year != end_date.year() { - return false; - } - - // Quarter value must be valid (1-4) - if !(1..=4).contains(&value) { - return false; - } - - let quarter_start_month = (value - 1) * 3 + 1; - - // Obtain the new range - let Some(new_start_date) = - NaiveDate::from_ymd_opt(start_date_year, quarter_start_month as u32, 1) - else { - return false; - }; - - let Some(new_end_date) = new_start_date - .checked_add_months(Months::new(3)) - .and_then(|date| date.checked_sub_days(Days::new(1))) - else { - return false; - }; - - // Paranoid check, If the resulting range is outside of the original range, we can't merge - // the filters - if new_start_date > end_date || new_end_date < start_date { - return false; - } - - // Preserves existing constraints, for example: - // inDataRange: order_date >= '2019-04-15' AND order_date < '2019-12-31' - // Month filter: EXTRACT(QUARTER FROM order_date) = 2 - let new_start_date = max(new_start_date, start_date); - let new_end_date = min(new_end_date, end_date); - - vec![ - new_start_date.format("%Y-%m-%d").to_string(), - new_end_date.format("%Y-%m-%d").to_string(), - ] - } - // TODO: handle more granularities - _ => return false, + // Use the utility function to calculate the date range for the given granularity + let Some((new_start_date, new_end_date)) = + try_merge_range_with_date_part(start_date, end_date, granularity.as_str(), value) + else { + return false; }; + let new_values = vec![ + new_start_date.format("%Y-%m-%d").to_string(), + new_end_date.format("%Y-%m-%d").to_string(), + ]; + subst.insert( new_values_var, egraph.add(LogicalPlanLanguage::FilterMemberValues(FilterMemberValues( diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/utils.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/utils.rs index 66eed50dc226d..efd19ffca59d9 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/utils.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/utils.rs @@ -4,7 +4,7 @@ use std::{ sync::Arc, }; -use chrono::{DateTime, Datelike, Timelike, Utc}; +use chrono::{DateTime, Datelike, Days, Months, NaiveDate, Timelike, Utc}; use datafusion::{ arrow::datatypes::{ArrowPrimitiveType, IntervalDayTimeType, IntervalMonthDayNanoType}, error::DataFusionError, @@ -744,3 +744,213 @@ impl DecomposedMonthDayNano { } } } + +/// Try to merge a date range with a date part extraction filter. +/// +/// This function calculates the date range for a specific date part (month, quarter, week) +/// within the given year, constrained by the provided start and end dates. +pub fn try_merge_range_with_date_part( + start_date: NaiveDate, + end_date: NaiveDate, + granularity: &str, + value: i64, +) -> Option<(NaiveDate, NaiveDate)> { + // Check that the range only covers one year + let year = start_date.year(); + if year != end_date.year() { + return None; + } + + match granularity { + "month" => { + // Month value must be valid + if !(1..=12).contains(&value) { + return None; + } + + // Obtain the new range + let new_start_date = NaiveDate::from_ymd_opt(year, value as u32, 1)?; + + let new_end_date = new_start_date + .checked_add_months(Months::new(1)) + .and_then(|date| date.checked_sub_days(Days::new(1)))?; + + // If the resulting range is outside of the original range, we can't merge + // the filters + if new_start_date > end_date || new_end_date < start_date { + return None; + } + + // Preserves existing constraints, for example: + // inDataRange: order_date >= '2019-02-15' AND order_date < '2019-03-10' + // filter: EXTRACT(MONTH FROM order_date) = 2 (February) + let new_start_date = max(new_start_date, start_date); + let new_end_date = min(new_end_date, end_date); + + Some((new_start_date, new_end_date)) + } + "quarter" | "qtr" => { + // Quarter value must be valid + if !(1..=4).contains(&value) { + return None; + } + + let quarter_start_month = (value - 1) * 3 + 1; + + // Obtain the new range + let new_start_date = NaiveDate::from_ymd_opt(year, quarter_start_month as u32, 1)?; + + let new_end_date = new_start_date + .checked_add_months(Months::new(3)) + .and_then(|date| date.checked_sub_days(Days::new(1)))?; + + // Paranoid check, If the resulting range is outside of the original range, we can't merge + // the filters + if new_start_date > end_date || new_end_date < start_date { + return None; + } + + // Preserves existing constraints, for example: + // inDataRange: order_date >= '2019-04-15' AND order_date < '2019-12-31' + // filter: EXTRACT(QUARTER FROM order_date) = 2 + let new_start_date = max(new_start_date, start_date); + let new_end_date = min(new_end_date, end_date); + + Some((new_start_date, new_end_date)) + } + // Following ISO 8601 + "week" => { + // Week value must be valid + if !(1..=53).contains(&value) { + return None; + } + + // For ISO weeks, we need to find the year that contains this week number + // Try with the start_date year first + let year = start_date.year(); + + // Get January 4th of the year (which is always in week 1) + let jan_4 = NaiveDate::from_ymd_opt(year, 1, 4)?; + + // Get the Monday of week 1 + let iso_week = jan_4.iso_week(); + let week_1_year = iso_week.year(); + + // Check if we're looking at the right ISO year + // The ISO year might differ from calendar year for dates near year boundaries + if week_1_year != year { + // This can happen when January 1-3 belong to the previous year's last week + // For now, we'll require that the range is within a single ISO year + return None; + } + + // Calculate the date of Monday of the requested week + // ISO week 1 starts on the Monday of the week containing January 4th + let days_from_week_1 = (value - 1) * 7; + let week_1_monday = jan_4 - Days::new(jan_4.weekday().num_days_from_monday() as u64); + + let week_start = week_1_monday.checked_add_days(Days::new(days_from_week_1 as u64))?; + + let week_end = week_start.checked_add_days(Days::new(6))?; + + // Verify this week actually exists in this year (week 53 doesn't always exist) + if week_start.iso_week().week() != value as u32 { + return None; + } + + // Paranoid check, If the resulting range is outside of the original range, we can't merge + // the filters + if week_start > end_date || week_end < start_date { + return None; + } + + // Preserves existing constraints, for example: + // inDataRange: order_date >= '2019-04-09' AND order_date <= '2019-04-12' + // filter: EXTRACT(WEEK FROM date) = 15 + let new_start_date = max(week_start, start_date); + let new_end_date = min(week_end, end_date); + + Some((new_start_date, new_end_date)) + } + // TODO: handle more granularities + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_calculate_week_range() -> Result<(), CubeError> { + let start = NaiveDate::from_ymd_opt(2019, 1, 1).expect("Invalid date"); + let end = NaiveDate::from_ymd_opt(2019, 12, 31).expect("Invalid date"); + + // Test week 1 of 2019 (Dec 31, 2018 - Jan 6, 2019) + // But constrained by our range starting Jan 1 + let (week_start, week_end) = + try_merge_range_with_date_part(start, end, "week", 1).expect("Expected week range"); + assert_eq!( + week_start, + NaiveDate::from_ymd_opt(2019, 1, 1).expect("Invalid date") + ); + assert_eq!( + week_end, + NaiveDate::from_ymd_opt(2019, 1, 6).expect("Invalid date") + ); + + // Test week 15 of 2019 (Apr 8-14) + let (week_start, week_end) = + try_merge_range_with_date_part(start, end, "week", 15).expect("Expected week range"); + assert_eq!( + week_start, + NaiveDate::from_ymd_opt(2019, 4, 8).expect("Invalid date") + ); + assert_eq!( + week_end, + NaiveDate::from_ymd_opt(2019, 4, 14).expect("Invalid date") + ); + + // Test week 52 of 2019 (Dec 23-29) + let (week_start, week_end) = + try_merge_range_with_date_part(start, end, "week", 52).expect("Expected week range"); + assert_eq!( + week_start, + NaiveDate::from_ymd_opt(2019, 12, 23).expect("Invalid date") + ); + assert_eq!( + week_end, + NaiveDate::from_ymd_opt(2019, 12, 29).expect("Invalid date") + ); + + // Test invalid week number + assert_eq!(try_merge_range_with_date_part(start, end, "week", 0), None); + assert_eq!(try_merge_range_with_date_part(start, end, "week", 54), None); + + // Test week 53 (which doesn't exist in 2019) + let result = try_merge_range_with_date_part(start, end, "week", 53); + assert!(result.is_none()); + + // Test partial overlap + let start = NaiveDate::from_ymd_opt(2019, 4, 10).expect("Invalid date"); + let end = NaiveDate::from_ymd_opt(2019, 4, 12).expect("Invalid date"); + let (week_start, week_end) = + try_merge_range_with_date_part(start, end, "week", 15).expect("Expected week range"); + assert_eq!( + week_start, + NaiveDate::from_ymd_opt(2019, 4, 10).expect("Invalid date") + ); + assert_eq!( + week_end, + NaiveDate::from_ymd_opt(2019, 4, 12).expect("Invalid date") + ); + + // Test no overlap + let start = NaiveDate::from_ymd_opt(2019, 5, 1).expect("Invalid date"); + let end = NaiveDate::from_ymd_opt(2019, 12, 31).expect("Invalid date"); + let result = try_merge_range_with_date_part(start, end, "week", 15); + assert!(result.is_none()); + + Ok(()) + } +}