diff --git a/Cargo.lock b/Cargo.lock index f500265108ff..d712eecfcc72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1832,7 +1832,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "arrow-schema", @@ -1904,7 +1904,7 @@ dependencies = [ [[package]] name = "datafusion-benchmarks" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion", @@ -1929,7 +1929,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -1952,7 +1952,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -1975,7 +1975,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2007,7 +2007,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "apache-avro", @@ -2034,7 +2034,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.3.0" +version = "51.0.0" dependencies = [ "futures", "log", @@ -2043,7 +2043,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-compression", @@ -2078,7 +2078,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "arrow-ipc", @@ -2101,7 +2101,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "50.3.0" +version = "51.0.0" dependencies = [ "apache-avro", "arrow", @@ -2120,7 +2120,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2141,7 +2141,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2161,7 +2161,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2190,11 +2190,11 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "50.3.0" +version = "51.0.0" [[package]] name = "datafusion-examples" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "arrow-flight", @@ -2228,7 +2228,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2249,7 +2249,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2273,7 +2273,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2284,7 +2284,7 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "50.3.0" +version = "51.0.0" dependencies = [ "abi_stable", "arrow", @@ -2306,7 +2306,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -2338,7 +2338,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2359,7 +2359,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2372,7 +2372,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "arrow-ord", @@ -2395,7 +2395,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2409,7 +2409,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2425,7 +2425,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2433,7 +2433,7 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.3.0" +version = "51.0.0" dependencies = [ "datafusion-doc", "quote", @@ -2442,7 +2442,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2469,7 +2469,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2494,7 +2494,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2507,7 +2507,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2519,7 +2519,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2539,7 +2539,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2575,7 +2575,7 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "chrono", @@ -2611,7 +2611,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2623,7 +2623,7 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2641,7 +2641,7 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.3.0" +version = "51.0.0" dependencies = [ "async-trait", "datafusion-common", @@ -2653,7 +2653,7 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2673,7 +2673,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2699,7 +2699,7 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2733,7 +2733,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "50.3.0" +version = "51.0.0" dependencies = [ "async-recursion", "async-trait", @@ -2755,7 +2755,7 @@ dependencies = [ [[package]] name = "datafusion-wasmtest" -version = "50.3.0" +version = "51.0.0" dependencies = [ "chrono", "console_error_panic_hook", diff --git a/Cargo.toml b/Cargo.toml index f15929b4c2b0..36198430e40b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,7 +79,7 @@ repository = "https://github.com/apache/datafusion" # Define Minimum Supported Rust Version (MSRV) rust-version = "1.88.0" # Define DataFusion version -version = "50.3.0" +version = "51.0.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -111,43 +111,43 @@ chrono = { version = "0.4.42", default-features = false } criterion = "0.7" ctor = "0.6.1" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "50.3.0", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "50.3.0" } -datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "50.3.0" } -datafusion-common = { path = "datafusion/common", version = "50.3.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "50.3.0" } -datafusion-datasource = { path = "datafusion/datasource", version = "50.3.0", default-features = false } -datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "50.3.0", default-features = false } -datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "50.3.0", default-features = false } -datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "50.3.0", default-features = false } -datafusion-datasource-json = { path = "datafusion/datasource-json", version = "50.3.0", default-features = false } -datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "50.3.0", default-features = false } -datafusion-doc = { path = "datafusion/doc", version = "50.3.0" } -datafusion-execution = { path = "datafusion/execution", version = "50.3.0", default-features = false } -datafusion-expr = { path = "datafusion/expr", version = "50.3.0", default-features = false } -datafusion-expr-common = { path = "datafusion/expr-common", version = "50.3.0" } -datafusion-ffi = { path = "datafusion/ffi", version = "50.3.0" } -datafusion-functions = { path = "datafusion/functions", version = "50.3.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "50.3.0" } -datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "50.3.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "50.3.0", default-features = false } -datafusion-functions-table = { path = "datafusion/functions-table", version = "50.3.0" } -datafusion-functions-window = { path = "datafusion/functions-window", version = "50.3.0" } -datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "50.3.0" } -datafusion-macros = { path = "datafusion/macros", version = "50.3.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "50.3.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "50.3.0", default-features = false } -datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "50.3.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "50.3.0", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "50.3.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "50.3.0" } -datafusion-proto = { path = "datafusion/proto", version = "50.3.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "50.3.0" } -datafusion-pruning = { path = "datafusion/pruning", version = "50.3.0" } -datafusion-session = { path = "datafusion/session", version = "50.3.0" } -datafusion-spark = { path = "datafusion/spark", version = "50.3.0" } -datafusion-sql = { path = "datafusion/sql", version = "50.3.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "50.3.0" } +datafusion = { path = "datafusion/core", version = "51.0.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "51.0.0" } +datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "51.0.0" } +datafusion-common = { path = "datafusion/common", version = "51.0.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "51.0.0" } +datafusion-datasource = { path = "datafusion/datasource", version = "51.0.0", default-features = false } +datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "51.0.0", default-features = false } +datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "51.0.0", default-features = false } +datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "51.0.0", default-features = false } +datafusion-datasource-json = { path = "datafusion/datasource-json", version = "51.0.0", default-features = false } +datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "51.0.0", default-features = false } +datafusion-doc = { path = "datafusion/doc", version = "51.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "51.0.0", default-features = false } +datafusion-expr = { path = "datafusion/expr", version = "51.0.0", default-features = false } +datafusion-expr-common = { path = "datafusion/expr-common", version = "51.0.0" } +datafusion-ffi = { path = "datafusion/ffi", version = "51.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "51.0.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "51.0.0" } +datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "51.0.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "51.0.0", default-features = false } +datafusion-functions-table = { path = "datafusion/functions-table", version = "51.0.0" } +datafusion-functions-window = { path = "datafusion/functions-window", version = "51.0.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "51.0.0" } +datafusion-macros = { path = "datafusion/macros", version = "51.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "51.0.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "51.0.0", default-features = false } +datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "51.0.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "51.0.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "51.0.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "51.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "51.0.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "51.0.0" } +datafusion-pruning = { path = "datafusion/pruning", version = "51.0.0" } +datafusion-session = { path = "datafusion/session", version = "51.0.0" } +datafusion-spark = { path = "datafusion/spark", version = "51.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "51.0.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "51.0.0" } doc-comment = "0.3" env_logger = "0.11" diff --git a/datafusion-testing b/datafusion-testing index eccb0e4a4263..8ad3ac00c199 160000 --- a/datafusion-testing +++ b/datafusion-testing @@ -1 +1 @@ -Subproject commit eccb0e4a426344ef3faf534cd60e02e9c3afd3ac +Subproject commit 8ad3ac00c1990d44a99fb6738d7e444f0ccf76a0 diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs index 6266a7184cf5..7f11899af6b6 100644 --- a/datafusion/core/benches/sql_planner.rs +++ b/datafusion/core/benches/sql_planner.rs @@ -477,9 +477,6 @@ fn criterion_benchmark(c: &mut Criterion) { }; let raw_tpcds_sql_queries = (1..100) - // skip query 75 until it is fixed - // https://github.com/apache/datafusion/issues/17801 - .filter(|q| *q != 75) .map(|q| std::fs::read_to_string(format!("{tests_path}tpc-ds/{q}.sql")).unwrap()) .collect::>(); diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 98804e424b40..aa378d42622d 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -52,8 +52,8 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_common::config::{CsvOptions, JsonOptions}; use datafusion_common::{ exec_err, internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, - Column, DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaError, - TableReference, UnnestOptions, + unqualified_field_not_found, Column, DFSchema, DataFusionError, ParamValues, + ScalarValue, SchemaError, TableReference, UnnestOptions, }; use datafusion_expr::select_expr::SelectExpr; use datafusion_expr::{ @@ -310,11 +310,20 @@ impl DataFrame { pub fn select_columns(self, columns: &[&str]) -> Result { let fields = columns .iter() - .flat_map(|name| { - self.plan + .map(|name| { + let fields = self + .plan .schema() - .qualified_fields_with_unqualified_name(name) + .qualified_fields_with_unqualified_name(name); + if fields.is_empty() { + Err(unqualified_field_not_found(name, self.plan.schema())) + } else { + Ok(fields) + } }) + .collect::, _>>()? + .into_iter() + .flatten() .collect::>(); let expr: Vec = fields .into_iter() @@ -1655,7 +1664,7 @@ impl DataFrame { pub fn into_view(self) -> Arc { Arc::new(DataFrameTableProvider { plan: self.plan, - table_type: TableType::Temporary, + table_type: TableType::View, }) } diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs index 265862ff9af8..e7f26036042f 100644 --- a/datafusion/core/tests/dataframe/dataframe_functions.rs +++ b/datafusion/core/tests/dataframe/dataframe_functions.rs @@ -274,33 +274,6 @@ async fn test_nvl2() -> Result<()> { Ok(()) } - -#[tokio::test] -async fn test_nvl2_short_circuit() -> Result<()> { - let expr = nvl2( - col("a"), - arrow_cast(lit("1"), lit("Int32")), - arrow_cast(col("a"), lit("Int32")), - ); - - let batches = get_batches(expr).await?; - - assert_snapshot!( - batches_to_string(&batches), - @r#" - +-----------------------------------------------------------------------------------+ - | nvl2(test.a,arrow_cast(Utf8("1"),Utf8("Int32")),arrow_cast(test.a,Utf8("Int32"))) | - +-----------------------------------------------------------------------------------+ - | 1 | - | 1 | - | 1 | - | 1 | - +-----------------------------------------------------------------------------------+ - "# - ); - - Ok(()) -} #[tokio::test] async fn test_fn_arrow_typeof() -> Result<()> { let expr = arrow_typeof(col("l")); diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 05f5a204c096..4d52345a2adc 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -67,7 +67,7 @@ use datafusion_catalog::TableProvider; use datafusion_common::test_util::{batches_to_sort_string, batches_to_string}; use datafusion_common::{ assert_contains, internal_datafusion_err, Constraint, Constraints, DFSchema, - DataFusionError, ScalarValue, TableReference, UnnestOptions, + DataFusionError, ScalarValue, SchemaError, TableReference, UnnestOptions, }; use datafusion_common_runtime::SpawnedTask; use datafusion_datasource::file_format::format_as_file_type; @@ -305,6 +305,27 @@ async fn select_columns() -> Result<()> { Ok(()) } +#[tokio::test] +async fn select_columns_with_nonexistent_columns() -> Result<()> { + let t = test_table().await?; + let t2 = t.select_columns(&["canada", "c2", "rocks"]); + + match t2 { + Err(DataFusionError::SchemaError(boxed_err, _)) => { + // Verify it's the first invalid column + match boxed_err.as_ref() { + SchemaError::FieldNotFound { field, .. } => { + assert_eq!(field.name(), "canada"); + } + _ => panic!("Expected SchemaError::FieldNotFound for 'canada'"), + } + } + _ => panic!("Expected SchemaError"), + } + + Ok(()) +} + #[tokio::test] async fn select_expr() -> Result<()> { // build plan using Table API @@ -1627,7 +1648,9 @@ async fn register_table() -> Result<()> { let df_impl = DataFrame::new(ctx.state(), df.logical_plan().clone()); // register a dataframe as a table - ctx.register_table("test_table", df_impl.clone().into_view())?; + let table_provider = df_impl.clone().into_view(); + assert_eq!(table_provider.table_type(), TableType::View); + ctx.register_table("test_table", table_provider)?; // pull the table out let table = ctx.table("test_table").await?; diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index 84e644480a4f..4aee274de908 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -320,26 +320,6 @@ async fn test_create_physical_expr() { create_simplified_expr_test(lit(1i32) + lit(2i32), "3"); } -#[test] -fn test_create_physical_expr_nvl2() { - let batch = &TEST_BATCH; - let df_schema = DFSchema::try_from(batch.schema()).unwrap(); - let ctx = SessionContext::new(); - - let expect_err = |expr| { - let physical_expr = ctx.create_physical_expr(expr, &df_schema).unwrap(); - let err = physical_expr.evaluate(batch).unwrap_err(); - assert!( - err.to_string() - .contains("nvl2 should have been simplified to case"), - "unexpected error: {err:?}" - ); - }; - - expect_err(nvl2(col("i"), lit(1i64), lit(0i64))); - expect_err(nvl2(lit(1i64), col("i"), lit(0i64))); -} - #[tokio::test] async fn test_create_physical_expr_coercion() { // create_physical_expr does apply type coercion and unwrapping in cast diff --git a/datafusion/core/tests/tpcds_planning.rs b/datafusion/core/tests/tpcds_planning.rs index 252d76d0f9d9..00e1b8724dbe 100644 --- a/datafusion/core/tests/tpcds_planning.rs +++ b/datafusion/core/tests/tpcds_planning.rs @@ -1051,10 +1051,13 @@ async fn regression_test(query_no: u8, create_physical: bool) -> Result<()> { for sql in &sql { let df = ctx.sql(sql).await?; - let (state, plan) = df.into_parts(); - let plan = state.optimize(&plan)?; - if create_physical { - let _ = state.create_physical_plan(&plan).await?; + // attempt to mimic planning steps + if !create_physical { + let (state, plan) = df.into_parts(); + let _ = state.optimize(&plan)?; + } else { + // this is what df.execute() does internally + let _ = df.create_physical_plan().await?; } } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index fd54bb13a62f..c1a55bcfd4f0 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -252,21 +252,7 @@ impl ScalarUDF { Ok(result) } - /// Determines which of the arguments passed to this function are evaluated eagerly - /// and which may be evaluated lazily. - /// - /// See [ScalarUDFImpl::conditional_arguments] for more information. - pub fn conditional_arguments<'a>( - &self, - args: &'a [Expr], - ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> { - self.inner.conditional_arguments(args) - } - - /// Returns true if some of this `exprs` subexpressions may not be evaluated - /// and thus any side effects (like divide by zero) may not be encountered. - /// - /// See [ScalarUDFImpl::short_circuits] for more information. + /// Get the circuits of inner implementation pub fn short_circuits(&self) -> bool { self.inner.short_circuits() } @@ -696,42 +682,10 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync { /// /// Setting this to true prevents certain optimizations such as common /// subexpression elimination - /// - /// When overriding this function to return `true`, [ScalarUDFImpl::conditional_arguments] can also be - /// overridden to report more accurately which arguments are eagerly evaluated and which ones - /// lazily. fn short_circuits(&self) -> bool { false } - /// Determines which of the arguments passed to this function are evaluated eagerly - /// and which may be evaluated lazily. - /// - /// If this function returns `None`, all arguments are eagerly evaluated. - /// Returning `None` is a micro optimization that saves a needless `Vec` - /// allocation. - /// - /// If the function returns `Some`, returns (`eager`, `lazy`) where `eager` - /// are the arguments that are always evaluated, and `lazy` are the - /// arguments that may be evaluated lazily (i.e. may not be evaluated at all - /// in some cases). - /// - /// Implementations must ensure that the two returned `Vec`s are disjunct, - /// and that each argument from `args` is present in one the two `Vec`s. - /// - /// When overriding this function, [ScalarUDFImpl::short_circuits] must - /// be overridden to return `true`. - fn conditional_arguments<'a>( - &self, - args: &'a [Expr], - ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> { - if self.short_circuits() { - Some((vec![], args.iter().collect())) - } else { - None - } - } - /// Computes the output [`Interval`] for a [`ScalarUDFImpl`], given the input /// intervals. /// @@ -921,13 +875,6 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { self.inner.simplify(args, info) } - fn conditional_arguments<'a>( - &self, - args: &'a [Expr], - ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> { - self.inner.conditional_arguments(args) - } - fn short_circuits(&self) -> bool { self.inner.short_circuits() } diff --git a/datafusion/ffi/src/catalog_provider.rs b/datafusion/ffi/src/catalog_provider.rs index 65dcab34f17d..d279951783b4 100644 --- a/datafusion/ffi/src/catalog_provider.rs +++ b/datafusion/ffi/src/catalog_provider.rs @@ -204,7 +204,7 @@ impl FFI_CatalogProvider { /// defined on this struct must only use the stable functions provided in /// FFI_CatalogProvider to interact with the foreign table provider. #[derive(Debug)] -pub struct ForeignCatalogProvider(FFI_CatalogProvider); +pub struct ForeignCatalogProvider(pub(crate) FFI_CatalogProvider); unsafe impl Send for ForeignCatalogProvider {} unsafe impl Sync for ForeignCatalogProvider {} diff --git a/datafusion/ffi/src/catalog_provider_list.rs b/datafusion/ffi/src/catalog_provider_list.rs new file mode 100644 index 000000000000..b09f06d318c1 --- /dev/null +++ b/datafusion/ffi/src/catalog_provider_list.rs @@ -0,0 +1,283 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{any::Any, ffi::c_void, sync::Arc}; + +use abi_stable::{ + std_types::{ROption, RString, RVec}, + StableAbi, +}; +use datafusion::catalog::{CatalogProvider, CatalogProviderList}; +use tokio::runtime::Handle; + +use crate::catalog_provider::{FFI_CatalogProvider, ForeignCatalogProvider}; + +/// A stable struct for sharing [`CatalogProviderList`] across FFI boundaries. +#[repr(C)] +#[derive(Debug, StableAbi)] +#[allow(non_camel_case_types)] +pub struct FFI_CatalogProviderList { + /// Register a catalog + pub register_catalog: unsafe extern "C" fn( + &Self, + name: RString, + catalog: &FFI_CatalogProvider, + ) -> ROption, + + /// List of existing catalogs + pub catalog_names: unsafe extern "C" fn(&Self) -> RVec, + + /// Access a catalog + pub catalog: + unsafe extern "C" fn(&Self, name: RString) -> ROption, + + /// Used to create a clone on the provider. This should only need to be called + /// by the receiver of the plan. + pub clone: unsafe extern "C" fn(plan: &Self) -> Self, + + /// Release the memory of the private data when it is no longer being used. + pub release: unsafe extern "C" fn(arg: &mut Self), + + /// Return the major DataFusion version number of this provider. + pub version: unsafe extern "C" fn() -> u64, + + /// Internal data. This is only to be accessed by the provider of the plan. + /// A [`ForeignCatalogProviderList`] should never attempt to access this data. + pub private_data: *mut c_void, +} + +unsafe impl Send for FFI_CatalogProviderList {} +unsafe impl Sync for FFI_CatalogProviderList {} + +struct ProviderPrivateData { + provider: Arc, + runtime: Option, +} + +impl FFI_CatalogProviderList { + unsafe fn inner(&self) -> &Arc { + let private_data = self.private_data as *const ProviderPrivateData; + &(*private_data).provider + } + + unsafe fn runtime(&self) -> Option { + let private_data = self.private_data as *const ProviderPrivateData; + (*private_data).runtime.clone() + } +} + +unsafe extern "C" fn catalog_names_fn_wrapper( + provider: &FFI_CatalogProviderList, +) -> RVec { + let names = provider.inner().catalog_names(); + names.into_iter().map(|s| s.into()).collect() +} + +unsafe extern "C" fn register_catalog_fn_wrapper( + provider: &FFI_CatalogProviderList, + name: RString, + catalog: &FFI_CatalogProvider, +) -> ROption { + let runtime = provider.runtime(); + let provider = provider.inner(); + let catalog = Arc::new(ForeignCatalogProvider::from(catalog)); + + provider + .register_catalog(name.into(), catalog) + .map(|catalog| FFI_CatalogProvider::new(catalog, runtime)) + .into() +} + +unsafe extern "C" fn catalog_fn_wrapper( + provider: &FFI_CatalogProviderList, + name: RString, +) -> ROption { + let runtime = provider.runtime(); + let provider = provider.inner(); + provider + .catalog(name.as_str()) + .map(|catalog| FFI_CatalogProvider::new(catalog, runtime)) + .into() +} + +unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_CatalogProviderList) { + let private_data = Box::from_raw(provider.private_data as *mut ProviderPrivateData); + drop(private_data); +} + +unsafe extern "C" fn clone_fn_wrapper( + provider: &FFI_CatalogProviderList, +) -> FFI_CatalogProviderList { + let old_private_data = provider.private_data as *const ProviderPrivateData; + let runtime = (*old_private_data).runtime.clone(); + + let private_data = Box::into_raw(Box::new(ProviderPrivateData { + provider: Arc::clone(&(*old_private_data).provider), + runtime, + })) as *mut c_void; + + FFI_CatalogProviderList { + register_catalog: register_catalog_fn_wrapper, + catalog_names: catalog_names_fn_wrapper, + catalog: catalog_fn_wrapper, + clone: clone_fn_wrapper, + release: release_fn_wrapper, + version: super::version, + private_data, + } +} + +impl Drop for FFI_CatalogProviderList { + fn drop(&mut self) { + unsafe { (self.release)(self) } + } +} + +impl FFI_CatalogProviderList { + /// Creates a new [`FFI_CatalogProviderList`]. + pub fn new( + provider: Arc, + runtime: Option, + ) -> Self { + let private_data = Box::new(ProviderPrivateData { provider, runtime }); + + Self { + register_catalog: register_catalog_fn_wrapper, + catalog_names: catalog_names_fn_wrapper, + catalog: catalog_fn_wrapper, + clone: clone_fn_wrapper, + release: release_fn_wrapper, + version: super::version, + private_data: Box::into_raw(private_data) as *mut c_void, + } + } +} + +/// This wrapper struct exists on the receiver side of the FFI interface, so it has +/// no guarantees about being able to access the data in `private_data`. Any functions +/// defined on this struct must only use the stable functions provided in +/// FFI_CatalogProviderList to interact with the foreign catalog provider list. +#[derive(Debug)] +pub struct ForeignCatalogProviderList(FFI_CatalogProviderList); + +unsafe impl Send for ForeignCatalogProviderList {} +unsafe impl Sync for ForeignCatalogProviderList {} + +impl From<&FFI_CatalogProviderList> for ForeignCatalogProviderList { + fn from(provider: &FFI_CatalogProviderList) -> Self { + Self(provider.clone()) + } +} + +impl Clone for FFI_CatalogProviderList { + fn clone(&self) -> Self { + unsafe { (self.clone)(self) } + } +} + +impl CatalogProviderList for ForeignCatalogProviderList { + fn as_any(&self) -> &dyn Any { + self + } + + fn register_catalog( + &self, + name: String, + catalog: Arc, + ) -> Option> { + unsafe { + let catalog = match catalog.as_any().downcast_ref::() + { + Some(s) => &s.0, + None => &FFI_CatalogProvider::new(catalog, None), + }; + + (self.0.register_catalog)(&self.0, name.into(), catalog) + .map(|s| Arc::new(ForeignCatalogProvider(s)) as Arc) + .into() + } + } + + fn catalog_names(&self) -> Vec { + unsafe { + (self.0.catalog_names)(&self.0) + .into_iter() + .map(Into::into) + .collect() + } + } + + fn catalog(&self, name: &str) -> Option> { + unsafe { + (self.0.catalog)(&self.0, name.into()) + .map(|catalog| { + Arc::new(ForeignCatalogProvider(catalog)) as Arc + }) + .into() + } + } +} + +#[cfg(test)] +mod tests { + use datafusion::catalog::{MemoryCatalogProvider, MemoryCatalogProviderList}; + + use super::*; + + #[test] + fn test_round_trip_ffi_catalog_provider_list() { + let prior_catalog = Arc::new(MemoryCatalogProvider::new()); + + let catalog_list = Arc::new(MemoryCatalogProviderList::new()); + assert!(catalog_list + .as_ref() + .register_catalog("prior_catalog".to_owned(), prior_catalog) + .is_none()); + + let ffi_catalog_list = FFI_CatalogProviderList::new(catalog_list, None); + + let foreign_catalog_list: ForeignCatalogProviderList = (&ffi_catalog_list).into(); + + let prior_catalog_names = foreign_catalog_list.catalog_names(); + assert_eq!(prior_catalog_names.len(), 1); + assert_eq!(prior_catalog_names[0], "prior_catalog"); + + // Replace an existing catalog with one of the same name + let returned_catalog = foreign_catalog_list.register_catalog( + "prior_catalog".to_owned(), + Arc::new(MemoryCatalogProvider::new()), + ); + assert!(returned_catalog.is_some()); + assert_eq!(foreign_catalog_list.catalog_names().len(), 1); + + // Add a new catalog + let returned_catalog = foreign_catalog_list.register_catalog( + "second_catalog".to_owned(), + Arc::new(MemoryCatalogProvider::new()), + ); + assert!(returned_catalog.is_none()); + assert_eq!(foreign_catalog_list.catalog_names().len(), 2); + + // Retrieve non-existent catalog + let returned_catalog = foreign_catalog_list.catalog("non_existent_catalog"); + assert!(returned_catalog.is_none()); + + // Retrieve valid catalog + let returned_catalog = foreign_catalog_list.catalog("second_catalog"); + assert!(returned_catalog.is_some()); + } +} diff --git a/datafusion/ffi/src/lib.rs b/datafusion/ffi/src/lib.rs index 0c2340e8ce7b..a8094057773b 100644 --- a/datafusion/ffi/src/lib.rs +++ b/datafusion/ffi/src/lib.rs @@ -26,6 +26,7 @@ pub mod arrow_wrappers; pub mod catalog_provider; +pub mod catalog_provider_list; pub mod execution_plan; pub mod insert_op; pub mod plan_properties; diff --git a/datafusion/ffi/src/tests/catalog.rs b/datafusion/ffi/src/tests/catalog.rs index f4293adb41b9..b6efbdf726e0 100644 --- a/datafusion/ffi/src/tests/catalog.rs +++ b/datafusion/ffi/src/tests/catalog.rs @@ -28,12 +28,13 @@ use std::{any::Any, fmt::Debug, sync::Arc}; use crate::catalog_provider::FFI_CatalogProvider; +use crate::catalog_provider_list::FFI_CatalogProviderList; use arrow::datatypes::Schema; use async_trait::async_trait; use datafusion::{ catalog::{ - CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider, - TableProvider, + CatalogProvider, CatalogProviderList, MemoryCatalogProvider, + MemoryCatalogProviderList, MemorySchemaProvider, SchemaProvider, TableProvider, }, common::exec_err, datasource::MemTable, @@ -181,3 +182,55 @@ pub(crate) extern "C" fn create_catalog_provider() -> FFI_CatalogProvider { let catalog_provider = Arc::new(FixedCatalogProvider::default()); FFI_CatalogProvider::new(catalog_provider, None) } + +/// This catalog provider list is intended only for unit tests. It prepopulates with one +/// catalog and only allows for catalogs named after four colors. +#[derive(Debug)] +pub struct FixedCatalogProviderList { + inner: MemoryCatalogProviderList, +} + +impl Default for FixedCatalogProviderList { + fn default() -> Self { + let inner = MemoryCatalogProviderList::new(); + + let _ = inner.register_catalog( + "blue".to_owned(), + Arc::new(FixedCatalogProvider::default()), + ); + + Self { inner } + } +} + +impl CatalogProviderList for FixedCatalogProviderList { + fn as_any(&self) -> &dyn Any { + self + } + + fn catalog_names(&self) -> Vec { + self.inner.catalog_names() + } + + fn catalog(&self, name: &str) -> Option> { + self.inner.catalog(name) + } + + fn register_catalog( + &self, + name: String, + catalog: Arc, + ) -> Option> { + if !["blue", "red", "green", "yellow"].contains(&name.as_str()) { + log::warn!("FixedCatalogProviderList only provides four catalogs: blue, red, green, yellow"); + return None; + } + + self.inner.register_catalog(name, catalog) + } +} + +pub(crate) extern "C" fn create_catalog_provider_list() -> FFI_CatalogProviderList { + let catalog_provider_list = Arc::new(FixedCatalogProviderList::default()); + FFI_CatalogProviderList::new(catalog_provider_list, None) +} diff --git a/datafusion/ffi/src/tests/mod.rs b/datafusion/ffi/src/tests/mod.rs index 816086c32041..d9b4a61579e9 100644 --- a/datafusion/ffi/src/tests/mod.rs +++ b/datafusion/ffi/src/tests/mod.rs @@ -34,6 +34,8 @@ use crate::udaf::FFI_AggregateUDF; use crate::udwf::FFI_WindowUDF; use super::{table_provider::FFI_TableProvider, udf::FFI_ScalarUDF}; +use crate::catalog_provider_list::FFI_CatalogProviderList; +use crate::tests::catalog::create_catalog_provider_list; use arrow::array::RecordBatch; use async_provider::create_async_table_provider; use datafusion::{ @@ -62,6 +64,9 @@ pub struct ForeignLibraryModule { /// Construct an opinionated catalog provider pub create_catalog: extern "C" fn() -> FFI_CatalogProvider, + /// Construct an opinionated catalog provider list + pub create_catalog_list: extern "C" fn() -> FFI_CatalogProviderList, + /// Constructs the table provider pub create_table: extern "C" fn(synchronous: bool) -> FFI_TableProvider, @@ -123,6 +128,7 @@ extern "C" fn construct_table_provider(synchronous: bool) -> FFI_TableProvider { pub fn get_foreign_library_module() -> ForeignLibraryModuleRef { ForeignLibraryModule { create_catalog: create_catalog_provider, + create_catalog_list: create_catalog_provider_list, create_table: construct_table_provider, create_scalar_udf: create_ffi_abs_func, create_nullary_udf: create_ffi_random_func, diff --git a/datafusion/ffi/tests/ffi_catalog.rs b/datafusion/ffi/tests/ffi_catalog.rs new file mode 100644 index 000000000000..b63d8cbd631b --- /dev/null +++ b/datafusion/ffi/tests/ffi_catalog.rs @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// Add an additional module here for convenience to scope this to only +/// when the feature integration-tests is built +#[cfg(feature = "integration-tests")] +mod tests { + use datafusion::prelude::SessionContext; + use datafusion_common::DataFusionError; + use datafusion_ffi::catalog_provider::ForeignCatalogProvider; + use datafusion_ffi::catalog_provider_list::ForeignCatalogProviderList; + use datafusion_ffi::tests::utils::get_module; + use std::sync::Arc; + + #[tokio::test] + async fn test_catalog() -> datafusion_common::Result<()> { + let module = get_module()?; + + let ffi_catalog = + module + .create_catalog() + .ok_or(DataFusionError::NotImplemented( + "External catalog provider failed to implement create_catalog" + .to_string(), + ))?(); + let foreign_catalog: ForeignCatalogProvider = (&ffi_catalog).into(); + + let ctx = SessionContext::default(); + let _ = ctx.register_catalog("fruit", Arc::new(foreign_catalog)); + + let df = ctx.table("fruit.apple.purchases").await?; + + let results = df.collect().await?; + + assert_eq!(results.len(), 2); + let num_rows: usize = results.into_iter().map(|rb| rb.num_rows()).sum(); + assert_eq!(num_rows, 5); + + Ok(()) + } + + #[tokio::test] + async fn test_catalog_list() -> datafusion_common::Result<()> { + let module = get_module()?; + + let ffi_catalog_list = + module + .create_catalog_list() + .ok_or(DataFusionError::NotImplemented( + "External catalog provider failed to implement create_catalog_list" + .to_string(), + ))?(); + let foreign_catalog_list: ForeignCatalogProviderList = (&ffi_catalog_list).into(); + + let ctx = SessionContext::default(); + ctx.register_catalog_list(Arc::new(foreign_catalog_list)); + + let df = ctx.table("blue.apple.purchases").await?; + + let results = df.collect().await?; + + assert_eq!(results.len(), 2); + let num_rows: usize = results.into_iter().map(|rb| rb.num_rows()).sum(); + assert_eq!(num_rows, 5); + + Ok(()) + } +} diff --git a/datafusion/ffi/tests/ffi_integration.rs b/datafusion/ffi/tests/ffi_integration.rs index eb53e76bfb9b..7b4d1b1e350a 100644 --- a/datafusion/ffi/tests/ffi_integration.rs +++ b/datafusion/ffi/tests/ffi_integration.rs @@ -21,7 +21,6 @@ mod tests { use datafusion::error::{DataFusionError, Result}; use datafusion::prelude::SessionContext; - use datafusion_ffi::catalog_provider::ForeignCatalogProvider; use datafusion_ffi::table_provider::ForeignTableProvider; use datafusion_ffi::tests::create_record_batch; use datafusion_ffi::tests::utils::get_module; @@ -69,30 +68,4 @@ mod tests { async fn sync_test_table_provider() -> Result<()> { test_table_provider(true).await } - - #[tokio::test] - async fn test_catalog() -> Result<()> { - let module = get_module()?; - - let ffi_catalog = - module - .create_catalog() - .ok_or(DataFusionError::NotImplemented( - "External catalog provider failed to implement create_catalog" - .to_string(), - ))?(); - let foreign_catalog: ForeignCatalogProvider = (&ffi_catalog).into(); - - let ctx = SessionContext::default(); - let _ = ctx.register_catalog("fruit", Arc::new(foreign_catalog)); - - let df = ctx.table("fruit.apple.purchases").await?; - - let results = df.collect().await?; - - assert!(!results.is_empty()); - assert!(results[0].num_rows() != 0); - - Ok(()) - } } diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs index aab1f445d559..b0f3483513ed 100644 --- a/datafusion/functions/src/core/coalesce.rs +++ b/datafusion/functions/src/core/coalesce.rs @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. +use arrow::array::{new_null_array, BooleanArray}; +use arrow::compute::kernels::zip::zip; +use arrow::compute::{and, is_not_null, is_null}; use arrow::datatypes::{DataType, Field, FieldRef}; -use datafusion_common::{exec_err, internal_err, plan_err, Result}; +use datafusion_common::{exec_err, internal_err, Result}; use datafusion_expr::binary::try_type_union_resolution; -use datafusion_expr::conditional_expressions::CaseBuilder; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; use datafusion_expr::{ - ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, + ColumnarValue, Documentation, ReturnFieldArgs, ScalarFunctionArgs, }; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; use datafusion_macros::user_doc; @@ -47,7 +48,7 @@ use std::any::Any; )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct CoalesceFunc { - pub(super) signature: Signature, + signature: Signature, } impl Default for CoalesceFunc { @@ -94,45 +95,61 @@ impl ScalarUDFImpl for CoalesceFunc { Ok(Field::new(self.name(), return_type, nullable).into()) } - fn simplify( - &self, - args: Vec, - _info: &dyn SimplifyInfo, - ) -> Result { + /// coalesce evaluates to the first value which is not NULL + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let args = args.args; + // do not accept 0 arguments. if args.is_empty() { - return plan_err!("coalesce must have at least one argument"); - } - if args.len() == 1 { - return Ok(ExprSimplifyResult::Simplified( - args.into_iter().next().unwrap(), - )); + return exec_err!( + "coalesce was called with {} arguments. It requires at least 1.", + args.len() + ); } - let n = args.len(); - let (init, last_elem) = args.split_at(n - 1); - let whens = init - .iter() - .map(|x| x.clone().is_not_null()) - .collect::>(); - let cases = init.to_vec(); - Ok(ExprSimplifyResult::Simplified( - CaseBuilder::new(None, whens, cases, Some(Box::new(last_elem[0].clone()))) - .end()?, - )) - } - - /// coalesce evaluates to the first value which is not NULL - fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { - internal_err!("coalesce should have been simplified to case") - } - - fn conditional_arguments<'a>( - &self, - args: &'a [Expr], - ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> { - let eager = vec![&args[0]]; - let lazy = args[1..].iter().collect(); - Some((eager, lazy)) + let return_type = args[0].data_type(); + let mut return_array = args.iter().filter_map(|x| match x { + ColumnarValue::Array(array) => Some(array.len()), + _ => None, + }); + + if let Some(size) = return_array.next() { + // start with nulls as default output + let mut current_value = new_null_array(&return_type, size); + let mut remainder = BooleanArray::from(vec![true; size]); + + for arg in args { + match arg { + ColumnarValue::Array(ref array) => { + let to_apply = and(&remainder, &is_not_null(array.as_ref())?)?; + current_value = zip(&to_apply, array, ¤t_value)?; + remainder = and(&remainder, &is_null(array)?)?; + } + ColumnarValue::Scalar(value) => { + if value.is_null() { + continue; + } else { + let last_value = value.to_scalar()?; + current_value = zip(&remainder, &last_value, ¤t_value)?; + break; + } + } + } + if remainder.iter().all(|x| x == Some(false)) { + break; + } + } + Ok(ColumnarValue::Array(current_value)) + } else { + let result = args + .iter() + .filter_map(|x| match x { + ColumnarValue::Scalar(s) if !s.is_null() => Some(x.clone()), + _ => None, + }) + .next() + .unwrap_or_else(|| args[0].clone()); + Ok(result) + } } fn short_circuits(&self) -> bool { diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs index 0b9968a88fc9..c8b34c4b1780 100644 --- a/datafusion/functions/src/core/nvl.rs +++ b/datafusion/functions/src/core/nvl.rs @@ -15,19 +15,21 @@ // specific language governing permissions and limitations // under the License. -use crate::core::coalesce::CoalesceFunc; -use arrow::datatypes::{DataType, FieldRef}; -use datafusion_common::Result; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use arrow::array::Array; +use arrow::compute::is_not_null; +use arrow::compute::kernels::zip::zip; +use arrow::datatypes::DataType; +use datafusion_common::{utils::take_function_args, Result}; use datafusion_expr::{ - ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, - ScalarUDFImpl, Signature, Volatility, + ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, + Volatility, }; use datafusion_macros::user_doc; +use std::sync::Arc; #[user_doc( doc_section(label = "Conditional Functions"), - description = "Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_ and _expression2_ is not evaluated. This function can be used to substitute a default value for NULL values.", + description = "Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_.", syntax_example = "nvl(expression1, expression2)", sql_example = r#"```sql > select nvl(null, 'a'); @@ -55,7 +57,7 @@ use datafusion_macros::user_doc; )] #[derive(Debug, PartialEq, Eq, Hash)] pub struct NVLFunc { - coalesce: CoalesceFunc, + signature: Signature, aliases: Vec, } @@ -88,13 +90,11 @@ impl Default for NVLFunc { impl NVLFunc { pub fn new() -> Self { Self { - coalesce: CoalesceFunc { - signature: Signature::uniform( - 2, - SUPPORTED_NVL_TYPES.to_vec(), - Volatility::Immutable, - ), - }, + signature: Signature::uniform( + 2, + SUPPORTED_NVL_TYPES.to_vec(), + Volatility::Immutable, + ), aliases: vec![String::from("ifnull")], } } @@ -110,45 +110,209 @@ impl ScalarUDFImpl for NVLFunc { } fn signature(&self) -> &Signature { - &self.coalesce.signature + &self.signature } fn return_type(&self, arg_types: &[DataType]) -> Result { - self.coalesce.return_type(arg_types) + Ok(arg_types[0].clone()) } - fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { - self.coalesce.return_field_from_args(args) + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + nvl_func(&args.args) } - fn simplify( - &self, - args: Vec, - info: &dyn SimplifyInfo, - ) -> Result { - self.coalesce.simplify(args, info) + fn aliases(&self) -> &[String] { + &self.aliases } - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - self.coalesce.invoke_with_args(args) + fn documentation(&self) -> Option<&Documentation> { + self.doc() } +} + +fn nvl_func(args: &[ColumnarValue]) -> Result { + let [lhs, rhs] = take_function_args("nvl/ifnull", args)?; + let (lhs_array, rhs_array) = match (lhs, rhs) { + (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => { + (Arc::clone(lhs), rhs.to_array_of_size(lhs.len())?) + } + (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => { + (Arc::clone(lhs), Arc::clone(rhs)) + } + (ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => { + (lhs.to_array_of_size(rhs.len())?, Arc::clone(rhs)) + } + (ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => { + let mut current_value = lhs; + if lhs.is_null() { + current_value = rhs; + } + return Ok(ColumnarValue::Scalar(current_value.clone())); + } + }; + let to_apply = is_not_null(&lhs_array)?; + let value = zip(&to_apply, &lhs_array, &rhs_array)?; + Ok(ColumnarValue::Array(value)) +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use arrow::array::*; + + use super::*; + use datafusion_common::ScalarValue; + + #[test] + fn nvl_int32() -> Result<()> { + let a = Int32Array::from(vec![ + Some(1), + Some(2), + None, + None, + Some(3), + None, + None, + Some(4), + Some(5), + ]); + let a = ColumnarValue::Array(Arc::new(a)); + + let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(6i32))); - fn conditional_arguments<'a>( - &self, - args: &'a [Expr], - ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> { - self.coalesce.conditional_arguments(args) + let result = nvl_func(&[a, lit_array])?; + let result = result.into_array(0).expect("Failed to convert to array"); + + let expected = Arc::new(Int32Array::from(vec![ + Some(1), + Some(2), + Some(6), + Some(6), + Some(3), + Some(6), + Some(6), + Some(4), + Some(5), + ])) as ArrayRef; + assert_eq!(expected.as_ref(), result.as_ref()); + Ok(()) } - fn short_circuits(&self) -> bool { - self.coalesce.short_circuits() + #[test] + // Ensure that arrays with no nulls can also invoke nvl() correctly + fn nvl_int32_non_nulls() -> Result<()> { + let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]); + let a = ColumnarValue::Array(Arc::new(a)); + + let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(20i32))); + + let result = nvl_func(&[a, lit_array])?; + let result = result.into_array(0).expect("Failed to convert to array"); + + let expected = Arc::new(Int32Array::from(vec![ + Some(1), + Some(3), + Some(10), + Some(7), + Some(8), + Some(1), + Some(2), + Some(4), + Some(5), + ])) as ArrayRef; + assert_eq!(expected.as_ref(), result.as_ref()); + Ok(()) } - fn aliases(&self) -> &[String] { - &self.aliases + #[test] + fn nvl_boolean() -> Result<()> { + let a = BooleanArray::from(vec![Some(true), Some(false), None]); + let a = ColumnarValue::Array(Arc::new(a)); + + let lit_array = ColumnarValue::Scalar(ScalarValue::Boolean(Some(false))); + + let result = nvl_func(&[a, lit_array])?; + let result = result.into_array(0).expect("Failed to convert to array"); + + let expected = Arc::new(BooleanArray::from(vec![ + Some(true), + Some(false), + Some(false), + ])) as ArrayRef; + + assert_eq!(expected.as_ref(), result.as_ref()); + Ok(()) } - fn documentation(&self) -> Option<&Documentation> { - self.doc() + #[test] + fn nvl_string() -> Result<()> { + let a = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); + let a = ColumnarValue::Array(Arc::new(a)); + + let lit_array = ColumnarValue::Scalar(ScalarValue::from("bax")); + + let result = nvl_func(&[a, lit_array])?; + let result = result.into_array(0).expect("Failed to convert to array"); + + let expected = Arc::new(StringArray::from(vec![ + Some("foo"), + Some("bar"), + Some("bax"), + Some("baz"), + ])) as ArrayRef; + + assert_eq!(expected.as_ref(), result.as_ref()); + Ok(()) + } + + #[test] + fn nvl_literal_first() -> Result<()> { + let a = Int32Array::from(vec![Some(1), Some(2), None, None, Some(3), Some(4)]); + let a = ColumnarValue::Array(Arc::new(a)); + + let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32))); + + let result = nvl_func(&[lit_array, a])?; + let result = result.into_array(0).expect("Failed to convert to array"); + + let expected = Arc::new(Int32Array::from(vec![ + Some(2), + Some(2), + Some(2), + Some(2), + Some(2), + Some(2), + ])) as ArrayRef; + assert_eq!(expected.as_ref(), result.as_ref()); + Ok(()) + } + + #[test] + fn nvl_scalar() -> Result<()> { + let a_null = ColumnarValue::Scalar(ScalarValue::Int32(None)); + let b_null = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32))); + + let result_null = nvl_func(&[a_null, b_null])?; + let result_null = result_null + .into_array(1) + .expect("Failed to convert to array"); + + let expected_null = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef; + + assert_eq!(expected_null.as_ref(), result_null.as_ref()); + + let a_nnull = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32))); + let b_nnull = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32))); + + let result_nnull = nvl_func(&[a_nnull, b_nnull])?; + let result_nnull = result_nnull + .into_array(1) + .expect("Failed to convert to array"); + + let expected_nnull = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef; + assert_eq!(expected_nnull.as_ref(), result_nnull.as_ref()); + + Ok(()) } } diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs index 45cb6760d062..82aa8d2a4cd5 100644 --- a/datafusion/functions/src/core/nvl2.rs +++ b/datafusion/functions/src/core/nvl2.rs @@ -15,16 +15,17 @@ // specific language governing permissions and limitations // under the License. -use arrow::datatypes::{DataType, Field, FieldRef}; +use arrow::array::Array; +use arrow::compute::is_not_null; +use arrow::compute::kernels::zip::zip; +use arrow::datatypes::DataType; use datafusion_common::{internal_err, utils::take_function_args, Result}; use datafusion_expr::{ - conditional_expressions::CaseBuilder, - simplify::{ExprSimplifyResult, SimplifyInfo}, - type_coercion::binary::comparison_coercion, - ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, - ScalarUDFImpl, Signature, Volatility, + type_coercion::binary::comparison_coercion, ColumnarValue, Documentation, + ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, }; use datafusion_macros::user_doc; +use std::sync::Arc; #[user_doc( doc_section(label = "Conditional Functions"), @@ -94,37 +95,8 @@ impl ScalarUDFImpl for NVL2Func { Ok(arg_types[1].clone()) } - fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { - let nullable = - args.arg_fields[1].is_nullable() || args.arg_fields[2].is_nullable(); - let return_type = args.arg_fields[1].data_type().clone(); - Ok(Field::new(self.name(), return_type, nullable).into()) - } - - fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { - internal_err!("nvl2 should have been simplified to case") - } - - fn simplify( - &self, - args: Vec, - _info: &dyn SimplifyInfo, - ) -> Result { - let [test, if_non_null, if_null] = take_function_args(self.name(), args)?; - - let expr = CaseBuilder::new( - None, - vec![test.is_not_null()], - vec![if_non_null], - Some(Box::new(if_null)), - ) - .end()?; - - Ok(ExprSimplifyResult::Simplified(expr)) - } - - fn short_circuits(&self) -> bool { - true + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + nvl2_func(&args.args) } fn coerce_types(&self, arg_types: &[DataType]) -> Result> { @@ -151,3 +123,42 @@ impl ScalarUDFImpl for NVL2Func { self.doc() } } + +fn nvl2_func(args: &[ColumnarValue]) -> Result { + let mut len = 1; + let mut is_array = false; + for arg in args { + if let ColumnarValue::Array(array) = arg { + len = array.len(); + is_array = true; + break; + } + } + if is_array { + let args = args + .iter() + .map(|arg| match arg { + ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(len), + ColumnarValue::Array(array) => Ok(Arc::clone(array)), + }) + .collect::>>()?; + let [tested, if_non_null, if_null] = take_function_args("nvl2", args)?; + let to_apply = is_not_null(&tested)?; + let value = zip(&to_apply, &if_non_null, &if_null)?; + Ok(ColumnarValue::Array(value)) + } else { + let [tested, if_non_null, if_null] = take_function_args("nvl2", args)?; + match &tested { + ColumnarValue::Array(_) => { + internal_err!("except Scalar value, but got Array") + } + ColumnarValue::Scalar(scalar) => { + if scalar.is_null() { + Ok(if_null.clone()) + } else { + Ok(if_non_null.clone()) + } + } + } + } +} diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 913e6217af82..c8376cf84415 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -276,6 +276,7 @@ impl ScalarUDFImpl for DateTruncFunc { T::UNIT, array, granularity, + tz_opt.clone(), )?; return Ok(ColumnarValue::Array(result)); } @@ -522,6 +523,7 @@ fn general_date_trunc_array_fine_granularity( tu: TimeUnit, array: &PrimitiveArray, granularity: DateTruncGranularity, + tz_opt: Option>, ) -> Result { let unit = match (tu, granularity) { (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60), @@ -556,7 +558,8 @@ fn general_date_trunc_array_fine_granularity( .iter() .map(|v| *v - i64::rem_euclid(*v, unit)), array.nulls().cloned(), - ); + ) + .with_timezone_opt(tz_opt); Ok(Arc::new(array)) } else { // truncate to the same or smaller unit @@ -1094,4 +1097,176 @@ mod tests { } }); } + + #[test] + fn test_date_trunc_fine_granularity_timezones() { + let cases = [ + // Test "second" granularity + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:42:30.500000Z", + "2020-09-08T13:42:31.999999Z", + ], + Some("+00".into()), + "second", + vec![ + "2020-09-08T13:42:29.000000Z", + "2020-09-08T13:42:30.000000Z", + "2020-09-08T13:42:31.000000Z", + ], + ), + ( + vec![ + "2020-09-08T13:42:29.190855+05", + "2020-09-08T13:42:30.500000+05", + "2020-09-08T13:42:31.999999+05", + ], + Some("+05".into()), + "second", + vec![ + "2020-09-08T13:42:29.000000+05", + "2020-09-08T13:42:30.000000+05", + "2020-09-08T13:42:31.000000+05", + ], + ), + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:42:30.500000Z", + "2020-09-08T13:42:31.999999Z", + ], + Some("Europe/Berlin".into()), + "second", + vec![ + "2020-09-08T13:42:29.000000Z", + "2020-09-08T13:42:30.000000Z", + "2020-09-08T13:42:31.000000Z", + ], + ), + // Test "minute" granularity + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:43:30.500000Z", + "2020-09-08T13:44:31.999999Z", + ], + Some("+00".into()), + "minute", + vec![ + "2020-09-08T13:42:00.000000Z", + "2020-09-08T13:43:00.000000Z", + "2020-09-08T13:44:00.000000Z", + ], + ), + ( + vec![ + "2020-09-08T13:42:29.190855+08", + "2020-09-08T13:43:30.500000+08", + "2020-09-08T13:44:31.999999+08", + ], + Some("+08".into()), + "minute", + vec![ + "2020-09-08T13:42:00.000000+08", + "2020-09-08T13:43:00.000000+08", + "2020-09-08T13:44:00.000000+08", + ], + ), + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:43:30.500000Z", + "2020-09-08T13:44:31.999999Z", + ], + Some("America/Sao_Paulo".into()), + "minute", + vec![ + "2020-09-08T13:42:00.000000Z", + "2020-09-08T13:43:00.000000Z", + "2020-09-08T13:44:00.000000Z", + ], + ), + // Test with None (no timezone) + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:43:30.500000Z", + "2020-09-08T13:44:31.999999Z", + ], + None, + "minute", + vec![ + "2020-09-08T13:42:00.000000Z", + "2020-09-08T13:43:00.000000Z", + "2020-09-08T13:44:00.000000Z", + ], + ), + // Test millisecond granularity + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:42:29.191999Z", + "2020-09-08T13:42:29.192500Z", + ], + Some("Asia/Kolkata".into()), + "millisecond", + vec![ + "2020-09-08T19:12:29.190000+05:30", + "2020-09-08T19:12:29.191000+05:30", + "2020-09-08T19:12:29.192000+05:30", + ], + ), + ]; + + cases + .iter() + .for_each(|(original, tz_opt, granularity, expected)| { + let input = original + .iter() + .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) + .collect::() + .with_timezone_opt(tz_opt.clone()); + let right = expected + .iter() + .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) + .collect::() + .with_timezone_opt(tz_opt.clone()); + let batch_len = input.len(); + let arg_fields = vec![ + Field::new("a", DataType::Utf8, false).into(), + Field::new("b", input.data_type().clone(), false).into(), + ]; + let args = datafusion_expr::ScalarFunctionArgs { + args: vec![ + ColumnarValue::Scalar(ScalarValue::from(*granularity)), + ColumnarValue::Array(Arc::new(input)), + ], + arg_fields, + number_rows: batch_len, + return_field: Field::new( + "f", + DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()), + true, + ) + .into(), + config_options: Arc::new(ConfigOptions::default()), + }; + let result = DateTruncFunc::new().invoke_with_args(args).unwrap(); + if let ColumnarValue::Array(result) = result { + assert_eq!( + result.data_type(), + &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()), + "Failed for granularity: {granularity}, timezone: {tz_opt:?}" + ); + let left = as_primitive_array::(&result); + assert_eq!( + left, &right, + "Failed for granularity: {granularity}, timezone: {tz_opt:?}" + ); + } else { + panic!("unexpected column type"); + } + }); + } } diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 251006849459..ec1f8f991a8e 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -652,8 +652,10 @@ impl CSEController for ExprCSEController<'_> { // In case of `ScalarFunction`s we don't know which children are surely // executed so start visiting all children conditionally and stop the // recursion with `TreeNodeRecursion::Jump`. - Expr::ScalarFunction(ScalarFunction { func, args }) => { - func.conditional_arguments(args) + Expr::ScalarFunction(ScalarFunction { func, args }) + if func.short_circuits() => + { + Some((vec![], args.iter().collect())) } // In case of `And` and `Or` the first child is surely executed, but we diff --git a/datafusion/sqllogictest/test_files/nvl.slt b/datafusion/sqllogictest/test_files/nvl.slt index f4225148ab78..daab54307cc2 100644 --- a/datafusion/sqllogictest/test_files/nvl.slt +++ b/datafusion/sqllogictest/test_files/nvl.slt @@ -148,38 +148,3 @@ query T SELECT NVL(arrow_cast('a', 'Utf8View'), NULL); ---- a - -# nvl is implemented as a case, and short-circuits evaluation -# so the following query should not error -query I -SELECT NVL(1, 1/0); ----- -1 - -# but this one should -query error DataFusion error: Arrow error: Divide by zero error -SELECT NVL(NULL, 1/0); - -# Expect the query plan to show nvl as a case expression -query I -select NVL(int_field, 9999) FROM test; ----- -1 -2 -3 -9999 -4 -9999 - -# Expect the query plan to show nvl as a case expression -query TT -EXPLAIN select NVL(int_field, 9999) FROM test; ----- -logical_plan -01)Projection: CASE WHEN __common_expr_1 IS NOT NULL THEN __common_expr_1 ELSE Int64(9999) END AS nvl(test.int_field,Int64(9999)) -02)--Projection: CAST(test.int_field AS Int64) AS __common_expr_1 -03)----TableScan: test projection=[int_field] -physical_plan -01)ProjectionExec: expr=[CASE WHEN __common_expr_1@0 IS NOT NULL THEN __common_expr_1@0 ELSE 9999 END as nvl(test.int_field,Int64(9999))] -02)--ProjectionExec: expr=[CAST(int_field@0 AS Int64) as __common_expr_1] -03)----DataSourceExec: partitions=1, partition_sizes=[1] diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 5c684eb83d1a..598a587bfe68 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1656,10 +1656,10 @@ query TT explain select coalesce(1, y/x), coalesce(2, y/x) from t; ---- logical_plan -01)Projection: Int64(1) AS coalesce(Int64(1),t.y / t.x), Int64(2) AS coalesce(Int64(2),t.y / t.x) -02)--TableScan: t projection=[] +01)Projection: coalesce(Int64(1), CAST(t.y / t.x AS Int64)), coalesce(Int64(2), CAST(t.y / t.x AS Int64)) +02)--TableScan: t projection=[x, y] physical_plan -01)ProjectionExec: expr=[1 as coalesce(Int64(1),t.y / t.x), 2 as coalesce(Int64(2),t.y / t.x)] +01)ProjectionExec: expr=[coalesce(1, CAST(y@1 / x@0 AS Int64)) as coalesce(Int64(1),t.y / t.x), coalesce(2, CAST(y@1 / x@0 AS Int64)) as coalesce(Int64(2),t.y / t.x)] 02)--DataSourceExec: partitions=1, partition_sizes=[1] query TT @@ -1686,17 +1686,11 @@ physical_plan 02)--ProjectionExec: expr=[y@1 = 0 as __common_expr_1, x@0 as x, y@1 as y] 03)----DataSourceExec: partitions=1, partition_sizes=[1] -query II -select coalesce(1, y/x), coalesce(2, y/x) from t; ----- -1 2 -1 2 -1 2 -1 2 -1 2 - # due to the reason describe in https://github.com/apache/datafusion/issues/8927, # the following queries will fail +query error +select coalesce(1, y/x), coalesce(2, y/x) from t; + query error SELECT y > 0 and 1 / y < 1, x > 0 and y > 0 and 1 / y < 1 / x from t; diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt index 4d30f572ad6f..fb67daa0b840 100644 --- a/datafusion/sqllogictest/test_files/string/string_view.slt +++ b/datafusion/sqllogictest/test_files/string/string_view.slt @@ -988,7 +988,7 @@ query TT EXPLAIN SELECT NVL(column1_utf8view, 'a') as c2 FROM test; ---- logical_plan -01)Projection: CASE WHEN test.column1_utf8view IS NOT NULL THEN test.column1_utf8view ELSE Utf8View("a") END AS c2 +01)Projection: nvl(test.column1_utf8view, Utf8View("a")) AS c2 02)--TableScan: test projection=[column1_utf8view] ## Ensure no casts for nullif diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index cdacad0fda0d..5c365b056d35 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -45,6 +45,9 @@ create table ts_data_millis as select arrow_cast(ts / 1000000, 'Timestamp(Millis statement ok create table ts_data_secs as select arrow_cast(ts / 1000000000, 'Timestamp(Second, None)') as ts, value from ts_data; +statement ok +create table ts_data_micros_kolkata as select arrow_cast(ts / 1000, 'Timestamp(Microsecond, Some("Asia/Kolkata"))') as ts, value from ts_data; + ########## ## Current date Tests @@ -1873,27 +1876,6 @@ true false true true -########## -## Common timestamp data -########## - -statement ok -drop table ts_data - -statement ok -drop table ts_data_nanos - -statement ok -drop table ts_data_micros - -statement ok -drop table ts_data_millis - -statement ok -drop table ts_data_secs - - - ########## ## Timezone impact on scalar functions # @@ -3703,3 +3685,34 @@ SELECT FROM (SELECT CAST('2005-09-10 13:31:00 +02:00' AS timestamp with time zone) AS a) ---- Timestamp(ns, "+00") 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z + +query P +SELECT + date_trunc('millisecond', ts) +FROM ts_data_micros_kolkata +---- +2020-09-08T19:12:29.190+05:30 +2020-09-08T18:12:29.190+05:30 +2020-09-08T17:12:29.190+05:30 + +########## +## Common timestamp data +########## + +statement ok +drop table ts_data + +statement ok +drop table ts_data_nanos + +statement ok +drop table ts_data_micros + +statement ok +drop table ts_data_millis + +statement ok +drop table ts_data_secs + +statement ok +drop table ts_data_micros_kolkata diff --git a/dev/changelog/51.0.0.md b/dev/changelog/51.0.0.md new file mode 100644 index 000000000000..60dd24cde559 --- /dev/null +++ b/dev/changelog/51.0.0.md @@ -0,0 +1,717 @@ + + +# Apache DataFusion 51.0.0 Changelog + +This release consists of 537 commits from 129 contributors. See credits at the end of this changelog for more information. + +See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions. + +**Breaking changes:** + +- Introduce `TypeSignatureClass::Binary` to allow accepting arbitrarily sized `FixedSizeBinary` arguments [#17531](https://github.com/apache/datafusion/pull/17531) (Jefffrey) +- feat: change `datafusion-proto` to use `TaskContext` rather than`SessionContext` for physical plan serialization [#17601](https://github.com/apache/datafusion/pull/17601) (milenkovicm) +- chore: refactor usage of `reassign_predicate_columns` [#17703](https://github.com/apache/datafusion/pull/17703) (rkrishn7) +- fix: correct edge case where null haystack returns false instead of null [#17818](https://github.com/apache/datafusion/pull/17818) (Jefffrey) +- clean up duplicate information in FileOpener trait [#17956](https://github.com/apache/datafusion/pull/17956) (adriangb) +- refactor : deprecate `ParquetSource::predicate()` and merge into `FileSource::filter()` [#17971](https://github.com/apache/datafusion/pull/17971) (getChan) +- feat: convert_array_to_scalar_vec respects null elements [#17891](https://github.com/apache/datafusion/pull/17891) (vegarsti) +- make Union::try_new pub [#18125](https://github.com/apache/datafusion/pull/18125) (leoyvens) +- refactor: remove unused `type_coercion/aggregate.rs` functions [#18091](https://github.com/apache/datafusion/pull/18091) (Jefffrey) +- refactor: remove core crate from datafusion-proto [#18123](https://github.com/apache/datafusion/pull/18123) (timsaucer) +- Use TableSchema in FileScanConfig [#18231](https://github.com/apache/datafusion/pull/18231) (adriangb) +- Enable placeholders with extension types [#17986](https://github.com/apache/datafusion/pull/17986) (paleolimbot) +- Implement `DESCRIBE SELECT` to show schema rather than `EXPLAIN` plan [#18238](https://github.com/apache/datafusion/pull/18238) (djanderson) +- Push partition_statistics into DataSource [#18233](https://github.com/apache/datafusion/pull/18233) (adriangb) +- Let `FileScanConfig` own a list of `ProjectionExpr`s [#18253](https://github.com/apache/datafusion/pull/18253) (friendlymatthew) +- Introduce `expr_fields` to `AccumulatorArgs` to hold input argument fields [#18100](https://github.com/apache/datafusion/pull/18100) (Jefffrey) +- Rename `is_ordered_set_aggregate` to `supports_within_group_clause` for UDAFs [#18397](https://github.com/apache/datafusion/pull/18397) (Jefffrey) +- Move generate_series projection logic into LazyMemoryStream [#18373](https://github.com/apache/datafusion/pull/18373) (mkleen) + +**Performance related:** + +- Improve `Hash` and `Ord` speed for `dyn LogicalType` [#17437](https://github.com/apache/datafusion/pull/17437) (findepi) +- Faster `&&String::to_string` [#17583](https://github.com/apache/datafusion/pull/17583) (findepi) +- perf: Simplify CASE for any WHEN TRUE [#17602](https://github.com/apache/datafusion/pull/17602) (petern48) +- perf: Improve the performance of WINDOW functions with many partitions [#17528](https://github.com/apache/datafusion/pull/17528) (nuno-faria) +- Avoid redundant Schema clones [#17643](https://github.com/apache/datafusion/pull/17643) (findepi) +- Prevent exponential planning time for Window functions - v2 [#17684](https://github.com/apache/datafusion/pull/17684) (berkaysynnada) +- Add case expr simplifiers for literal comparisons [#17743](https://github.com/apache/datafusion/pull/17743) (jackkleeman) +- Enable Projection Pushdown Optimization for Recursive CTEs [#16696](https://github.com/apache/datafusion/pull/16696) (kosiew) +- perf: Optimize CASE for any WHEN false [#17835](https://github.com/apache/datafusion/pull/17835) (petern48) +- feat: Simplify `NOT(IN ..)` to `NOT IN` and `NOT (EXISTS ..)` to `NOT EXISTS` [#17848](https://github.com/apache/datafusion/pull/17848) (Tpt) +- perf: Faster `string_agg()` aggregate function (1000x speed for no DISTINCT and ORDER case) [#17837](https://github.com/apache/datafusion/pull/17837) (2010YOUY01) +- optimizer: allow projection pushdown through aliased recursive CTE references [#17875](https://github.com/apache/datafusion/pull/17875) (kosiew) +- perf: Implement boolean group values [#17726](https://github.com/apache/datafusion/pull/17726) (ashdnazg) +- #17838 Rewrite `regexp_like` calls as `~` and `*~` operator expressions when possible [#17839](https://github.com/apache/datafusion/pull/17839) (pepijnve) +- perf: add to `aggregate_vectorized` bench benchmark for `PrimitiveGroupValueBuilder` as well [#17930](https://github.com/apache/datafusion/pull/17930) (rluvaton) +- #17972 Restore case expr/expr optimisation while ensuring lazy evaluation [#17973](https://github.com/apache/datafusion/pull/17973) (pepijnve) +- chore: use `NullBuffer::union` for Spark `concat` [#18087](https://github.com/apache/datafusion/pull/18087) (comphead) +- Short circuit complex case evaluation modes as soon as possible [#17898](https://github.com/apache/datafusion/pull/17898) (pepijnve) +- perf: Fix NLJ slow join with condition `array_has` [#18161](https://github.com/apache/datafusion/pull/18161) (2010YOUY01) +- perf: improve `ScalarValue::to_array_of_size` for Boolean and some null values [#18180](https://github.com/apache/datafusion/pull/18180) (rluvaton) +- Allow filter pushdown through AggregateExec [#18404](https://github.com/apache/datafusion/pull/18404) (LiaCastaneda) +- Avoid scatter operation in `ExpressionOrExpression` case evaluation method [#18444](https://github.com/apache/datafusion/pull/18444) (pepijnve) + +**Implemented enhancements:** + +- feat: Implement `DFSchema.print_schema_tree()` method [#17459](https://github.com/apache/datafusion/pull/17459) (comphead) +- feat(spark): implement Spark `length` function [#17475](https://github.com/apache/datafusion/pull/17475) (wForget) +- feat: Add binary to `join_fuzz` testing [#17497](https://github.com/apache/datafusion/pull/17497) (jonathanc-n) +- feat: Support log for Decimal128 and Decimal256 [#17023](https://github.com/apache/datafusion/pull/17023) (theirix) +- feat(spark): implement Spark bitwise function shiftleft/shiftright/shiftrightunsighed [#17013](https://github.com/apache/datafusion/pull/17013) (chenkovsky) +- feat: Ensure explain format in config is valid [#17549](https://github.com/apache/datafusion/pull/17549) (Weijun-H) +- feat: Simplify CASE WHEN true THEN expr to expr [#17450](https://github.com/apache/datafusion/pull/17450) (EeshanBembi) +- feat: add `sql` feature to make sql planning optional [#17332](https://github.com/apache/datafusion/pull/17332) (timsaucer) +- feat: Add `OR REPLACE` to creating external tables [#17580](https://github.com/apache/datafusion/pull/17580) (jonathanc-n) +- feat(substrait): add support for RightAnti and RightSemi join types [#17604](https://github.com/apache/datafusion/pull/17604) (bvolpato) +- feat(small): Display `NullEquality` in join executor's `EXPLAIN` output [#17664](https://github.com/apache/datafusion/pull/17664) (2010YOUY01) +- feat(substrait): add time literal support [#17655](https://github.com/apache/datafusion/pull/17655) (bvolpato) +- feat(spark): implement Spark `make_interval` function [#17424](https://github.com/apache/datafusion/pull/17424) (davidlghellin) +- feat: expose `udafs` and `udwfs` methods on `FunctionRegistry` [#17650](https://github.com/apache/datafusion/pull/17650) (milenkovicm) +- feat: Support Seconds and Milliseconds literals in substrait [#17707](https://github.com/apache/datafusion/pull/17707) (petern48) +- feat: support for null, date, and timestamp types in approx_distinct [#17618](https://github.com/apache/datafusion/pull/17618) (killme2008) +- feat: support `Utf8View` for more args of `regexp_replace` [#17195](https://github.com/apache/datafusion/pull/17195) (mbutrovich) +- feat(spark): implement Spark `map` function `map_from_arrays` [#17456](https://github.com/apache/datafusion/pull/17456) (SparkApplicationMaster) +- feat: Display window function's alias name in output column [#17788](https://github.com/apache/datafusion/pull/17788) (devampatel03) +- feat(spark): implement Spark `make_dt_interval` function [#17728](https://github.com/apache/datafusion/pull/17728) (davidlghellin) +- feat: support multi-threaded writing of Parquet files with modular encryption [#16738](https://github.com/apache/datafusion/pull/16738) (rok) +- feat(spark): implement Spark `map` function `map_from_entries` [#17779](https://github.com/apache/datafusion/pull/17779) (SparkApplicationMaster) +- feat: Add Hash Join benchmarks [#17636](https://github.com/apache/datafusion/pull/17636) (jonathanc-n) +- feat: Support swap for `RightMark` Join [#17651](https://github.com/apache/datafusion/pull/17651) (jonathanc-n) +- feat: support spark udf format_string [#17561](https://github.com/apache/datafusion/pull/17561) (chenkovsky) +- feat(spark): implement Spark `try_parse_url` function [#17485](https://github.com/apache/datafusion/pull/17485) (rafafrdz) +- feat: Support reading CSV files with inconsistent column counts [#17553](https://github.com/apache/datafusion/pull/17553) (EeshanBembi) +- feat: Adds Instrumented Object Store Registry to datafusion-cli [#17953](https://github.com/apache/datafusion/pull/17953) (BlakeOrth) +- feat: add cargo-machete in CI [#18030](https://github.com/apache/datafusion/pull/18030) (Weijun-H) +- feat(spark): implement Spark `elt` function [#17729](https://github.com/apache/datafusion/pull/17729) (davidlghellin) +- feat: support Spark `concat` string function [#18063](https://github.com/apache/datafusion/pull/18063) (comphead) +- feat: support `null_treatment`, `distinct`, and `filter` for window functions in proto [#18024](https://github.com/apache/datafusion/pull/18024) (dqkqd) +- feat: Add percentile_cont aggregate function [#17988](https://github.com/apache/datafusion/pull/17988) (adriangb) +- feat: spark udf array shuffle [#17674](https://github.com/apache/datafusion/pull/17674) (chenkovsky) +- feat: Support configurable `EXPLAIN ANALYZE` detail level [#18098](https://github.com/apache/datafusion/pull/18098) (2010YOUY01) +- feat: add fp16 support to Substrait [#18086](https://github.com/apache/datafusion/pull/18086) (westonpace) +- feat: `ClassicJoin` for PWMJ [#17482](https://github.com/apache/datafusion/pull/17482) (jonathanc-n) +- feat(docs): display compatible logo for dark mode [#18197](https://github.com/apache/datafusion/pull/18197) (foskey51) +- feat: Add `deregister_object_store` [#17999](https://github.com/apache/datafusion/pull/17999) (jonathanc-n) +- feat: Add existence join to NestedLoopJoin benchmarks [#18005](https://github.com/apache/datafusion/pull/18005) (jonathanc-n) +- feat(small): Set 'summary' level metrics for `DataSourceExec` with parquet source [#18196](https://github.com/apache/datafusion/pull/18196) (2010YOUY01) +- feat: be indifferent to padding when decoding base64 [#18264](https://github.com/apache/datafusion/pull/18264) (colinmarc) +- feat: Add `output_bytes` to baseline metrics [#18268](https://github.com/apache/datafusion/pull/18268) (2010YOUY01) +- feat: Introduce `PruningMetrics` and use it in parquet file pruning metric [#18297](https://github.com/apache/datafusion/pull/18297) (2010YOUY01) +- feat: Improve metrics for aggregate streams. [#18325](https://github.com/apache/datafusion/pull/18325) (EmilyMatt) +- feat: allow pushdown of dynamic filters having partition cols [#18172](https://github.com/apache/datafusion/pull/18172) (feniljain) +- feat: support temporary views in DataFrameTableProvider [#18158](https://github.com/apache/datafusion/pull/18158) (r1b) +- feat: Better parquet row-group/page pruning metrics display [#18321](https://github.com/apache/datafusion/pull/18321) (2010YOUY01) +- feat: Add Hash trait to StatsType enum [#18382](https://github.com/apache/datafusion/pull/18382) (rluvaton) +- feat: support get_field for map literal [#18371](https://github.com/apache/datafusion/pull/18371) (chenkovsky) +- feat(docs): enable navbar [#18324](https://github.com/apache/datafusion/pull/18324) (foskey51) +- feat: Add `selectivity` metrics to `FilterExec` [#18406](https://github.com/apache/datafusion/pull/18406) (2010YOUY01) +- feat: Add `reduction_factor` metric to `AggregateExec` for EXPLAIN ANALYZE [#18455](https://github.com/apache/datafusion/pull/18455) (petern48) +- feat: support named arguments for aggregate and window udfs [#18389](https://github.com/apache/datafusion/pull/18389) (bubulalabu) +- feat: Add selectivity metric to NestedLoopJoinExec for EXPLAIN ANALYZE [#18481](https://github.com/apache/datafusion/pull/18481) (petern48) + +**Fixed bugs:** + +- fix: lazy evaluation for coalesce [#17357](https://github.com/apache/datafusion/pull/17357) (chenkovsky) +- fix: Implement AggregateUDFImpl::reverse_expr for StringAgg [#17165](https://github.com/apache/datafusion/pull/17165) (nuno-faria) +- fix: Support aggregate expressions in `QUALIFY` [#17313](https://github.com/apache/datafusion/pull/17313) (rkrishn7) +- fix: synchronize partition bounds reporting in HashJoin [#17452](https://github.com/apache/datafusion/pull/17452) (rkrishn7) +- fix: correct typos in `CONTRIBUTING.md` [#17507](https://github.com/apache/datafusion/pull/17507) (Weijun-H) +- fix: Add AWS environment variable checks for S3 tests [#17519](https://github.com/apache/datafusion/pull/17519) (Weijun-H) +- fix: Ensure the CachedParquetFileReader respects the metadata prefetch hint [#17302](https://github.com/apache/datafusion/pull/17302) (nuno-faria) +- fix: prevent UnionExec panic with empty inputs [#17449](https://github.com/apache/datafusion/pull/17449) (EeshanBembi) +- fix: ignore non-existent columns when adding filter equivalence info in `FileScanConfig` [#17546](https://github.com/apache/datafusion/pull/17546) (rkrishn7) +- fix: Prevent duplicate expressions in DynamicPhysicalExpr [#17551](https://github.com/apache/datafusion/pull/17551) (UBarney) +- fix: `SortExec` `TopK` OOM [#17622](https://github.com/apache/datafusion/pull/17622) (nuno-faria) +- fix: Change `OuterReferenceColumn` to contain the entire outer field to prevent metadata loss [#17524](https://github.com/apache/datafusion/pull/17524) (Kontinuation) +- fix: Preserves field metadata when creating logical plan for VALUES expression [#17525](https://github.com/apache/datafusion/pull/17525) (Kontinuation) +- fix: Ignore governance doc from typos [#17678](https://github.com/apache/datafusion/pull/17678) (rkrishn7) +- fix: null padding for `array_reverse` on `FixedSizeList` [#17673](https://github.com/apache/datafusion/pull/17673) (chenkovsky) +- fix: correct statistics for `NestedLoopJoinExec` [#17680](https://github.com/apache/datafusion/pull/17680) (duongcongtoai) +- fix: Partial AggregateMode will generate duplicate field names which will fail DFSchema construct [#17706](https://github.com/apache/datafusion/pull/17706) (zhuqi-lucas) +- fix: Remove parquet encryption feature from root deps [#17700](https://github.com/apache/datafusion/pull/17700) (Vyquos) +- fix: Remove datafusion-macros's dependency on datafusion-expr [#17688](https://github.com/apache/datafusion/pull/17688) (yutannihilation) +- fix: Filter out nulls properly in approx_percentile_cont_with_weight [#17780](https://github.com/apache/datafusion/pull/17780) (Jefffrey) +- fix: ignore `DataType::Null` in possible types during csv type inference [#17796](https://github.com/apache/datafusion/pull/17796) (dqkqd) +- fix: `ParquetSource` - `with_predicate()` don't have to reset metrics [#17858](https://github.com/apache/datafusion/pull/17858) (2010YOUY01) +- fix: optimizer `common_sub_expression_eliminate` fails in a window function [#17852](https://github.com/apache/datafusion/pull/17852) (dqkqd) +- fix: fix failing test compilation on main [#17955](https://github.com/apache/datafusion/pull/17955) (Jefffrey) +- fix: update `PrimitiveGroupValueBuilder` to match NaN correctly in scalar `equal_to` [#17979](https://github.com/apache/datafusion/pull/17979) (rluvaton) +- fix: Add overflow checks to SparkDateAdd/Sub to avoid panics [#18013](https://github.com/apache/datafusion/pull/18013) (andygrove) +- fix: Ensure ListingTable partitions are pruned when filters are not used [#17958](https://github.com/apache/datafusion/pull/17958) (peasee) +- fix: Improve null handling in array_to_string function [#18076](https://github.com/apache/datafusion/pull/18076) (Weijun-H) +- fix: Re-bump latest datafusion-testing module so extended tests succeed [#18110](https://github.com/apache/datafusion/pull/18110) (Jefffrey) +- fix: window unparsing [#17367](https://github.com/apache/datafusion/pull/17367) (chenkovsky) +- fix: Add dictionary coercion support for numeric comparison operations [#18099](https://github.com/apache/datafusion/pull/18099) (ahmed-mez) +- fix(substrait): schema errors for Aggregates with no groupings [#17909](https://github.com/apache/datafusion/pull/17909) (vbarua) +- fix: `array_distinct` inner nullability causing type mismatch [#18104](https://github.com/apache/datafusion/pull/18104) (dqkqd) +- fix: improve document ui [#18157](https://github.com/apache/datafusion/pull/18157) (getChan) +- fix(docs): resolve extra outline on tables [#18193](https://github.com/apache/datafusion/pull/18193) (foskey51) +- fix: Use dynamic timezone in now() function for accurate timestamp [#18017](https://github.com/apache/datafusion/pull/18017) (Weijun-H) +- fix: UnnestExec preserves relevant equivalence properties of input [#16985](https://github.com/apache/datafusion/pull/16985) (vegarsti) +- fix: wrong simplification for >= >, <= < [#18222](https://github.com/apache/datafusion/pull/18222) (chenkovsky) +- fix: only fall back to listing prefixes on 404 errors [#18263](https://github.com/apache/datafusion/pull/18263) (colinmarc) +- fix: Support Dictionary[Int32, Binary] for bitmap count spark function [#18273](https://github.com/apache/datafusion/pull/18273) (kazantsev-maksim) +- fix: support float16 for `abs()` [#18304](https://github.com/apache/datafusion/pull/18304) (Jefffrey) +- fix: Add WITH ORDER display in information_schema.views [#18282](https://github.com/apache/datafusion/pull/18282) (gene-bordegaray) +- fix: correct date_trunc for times before the epoch [#18356](https://github.com/apache/datafusion/pull/18356) (mhilton) +- fix: Preserve percent-encoding in `PartitionedFile` paths during deserialization [#18346](https://github.com/apache/datafusion/pull/18346) (lonless9) +- fix: SortPreservingMerge sanity check rejects valid ORDER BY with CASE expression [#18342](https://github.com/apache/datafusion/pull/18342) (watford-ep) +- fix: `DataFrame::select_columns` and `DataFrame::drop_columns` for qualified duplicated field names [#18236](https://github.com/apache/datafusion/pull/18236) (dqkqd) +- fix(docs): remove navbar padding breaking ui on mobile [#18402](https://github.com/apache/datafusion/pull/18402) (foskey51) +- fix: null cast not valid in substrait round trip [#18414](https://github.com/apache/datafusion/pull/18414) (gene-bordegaray) +- fix: map benchmark failing [#18469](https://github.com/apache/datafusion/pull/18469) (randyli) +- fix: eliminate warning when building without sql feature [#18480](https://github.com/apache/datafusion/pull/18480) (corasaurus-hex) +- fix: spark array return type mismatch when inner data type is LargeList [#18485](https://github.com/apache/datafusion/pull/18485) (jizezhang) +- fix: shuffle seed [#18518](https://github.com/apache/datafusion/pull/18518) (chenkovsky) + +**Documentation updates:** + +- Auto detect hive column partitioning with ListingTableFactory / `CREATE EXTERNAL TABLE` [#17232](https://github.com/apache/datafusion/pull/17232) (BlakeOrth) +- Rename Blaze to Auron [#17532](https://github.com/apache/datafusion/pull/17532) (merrily01) +- Revert #17295 (Support from-first SQL syntax) [#17520](https://github.com/apache/datafusion/pull/17520) (adriangb) +- minor: Update doc comments on type signature [#17556](https://github.com/apache/datafusion/pull/17556) (Jefffrey) +- docs: Update documentation on Epics and Supervising Maintainers [#17505](https://github.com/apache/datafusion/pull/17505) (alamb) +- docs: Move Google Summer of Code 2025 pages to a section [#17504](https://github.com/apache/datafusion/pull/17504) (alamb) +- Upgrade to arrow 56.1.0 [#17275](https://github.com/apache/datafusion/pull/17275) (alamb) +- docs: add xorq to list of known users [#17668](https://github.com/apache/datafusion/pull/17668) (dlovell) +- docs: deduplicate links in `introduction.md` [#17669](https://github.com/apache/datafusion/pull/17669) (Jefffrey) +- Add explicit PMC/committers list to governance docs page [#17574](https://github.com/apache/datafusion/pull/17574) (alamb) +- chore: Update READMEs of crates to be more consistent [#17691](https://github.com/apache/datafusion/pull/17691) (Jefffrey) +- chore: fix wasm-pack installation link in wasmtest README [#17704](https://github.com/apache/datafusion/pull/17704) (Jefffrey) +- docs: Remove disclaimer that `datafusion` 50.0.0 is not released [#17695](https://github.com/apache/datafusion/pull/17695) (nuno-faria) +- Bump MSRV to 1.87.0 [#17724](https://github.com/apache/datafusion/pull/17724) (findepi) +- docs: Fix 'Clicking a link in optimizer docs downloads the file instead of redirecting to github' [#17723](https://github.com/apache/datafusion/pull/17723) (petern48) +- Move misplaced upgrading entry about MSRV [#17727](https://github.com/apache/datafusion/pull/17727) (findepi) +- Introduce `avg_distinct()` and `sum_distinct()` functions to DataFrame API [#17536](https://github.com/apache/datafusion/pull/17536) (Jefffrey) +- Support `WHERE`, `ORDER BY`, `LIMIT`, `SELECT`, `EXTEND` pipe operators [#17278](https://github.com/apache/datafusion/pull/17278) (simonvandel) +- doc: add missing examples for multiple math functions [#17018](https://github.com/apache/datafusion/pull/17018) (Adez017) +- chore: remove homebrew publish instructions from release steps [#17735](https://github.com/apache/datafusion/pull/17735) (Jefffrey) +- Improve documentation for ordered set aggregate functions [#17744](https://github.com/apache/datafusion/pull/17744) (alamb) +- docs: fix sidebar overlapping table on configuration page on website [#17738](https://github.com/apache/datafusion/pull/17738) (saimahendra282) +- docs: add Ballista link to landing page (#17746) [#17775](https://github.com/apache/datafusion/pull/17775) (Nihallllll) +- [DOCS] Add dbt Fusion engine and R2 Query Engine to "Known Users" [#17793](https://github.com/apache/datafusion/pull/17793) (dataders) +- docs: update wasmtest README with instructions for Apple silicon [#17755](https://github.com/apache/datafusion/pull/17755) (Jefffrey) +- docs: Add SedonaDB as known user of Apache DataFusion [#17806](https://github.com/apache/datafusion/pull/17806) (petern48) +- minor: simplify docs build process & pin pip package versions [#17816](https://github.com/apache/datafusion/pull/17816) (Jefffrey) +- Cleanup user guide known users section [#17834](https://github.com/apache/datafusion/pull/17834) (blaginin) +- Fix the doc about row_groups pruning metrics in explain_usage.md [#17846](https://github.com/apache/datafusion/pull/17846) (xudong963) +- Fix docs.rs build: Replace `auto_doc_cfg` with `doc_cfg` [#17845](https://github.com/apache/datafusion/pull/17845) (mbrobbel) +- docs: Add rerun.io to known users guide [#17825](https://github.com/apache/datafusion/pull/17825) (alamb) +- chore: fix typos & pin action hashes [#17855](https://github.com/apache/datafusion/pull/17855) (Jefffrey) +- Clarify email reply instructions for invitations [#17851](https://github.com/apache/datafusion/pull/17851) (rluvaton) +- Add missing parenthesis in features documentation [#17869](https://github.com/apache/datafusion/pull/17869) (Viicos) +- Improve comments for DataSinkExec [#17873](https://github.com/apache/datafusion/pull/17873) (xudong963) +- minor: Make `FunctionRegistry` `udafs` and `udwfs` methods mandatory [#17847](https://github.com/apache/datafusion/pull/17847) (milenkovicm) +- docs: Improve documentation for FunctionFactory / CREATE FUNCTION [#17859](https://github.com/apache/datafusion/pull/17859) (alamb) +- Support `AS`, `UNION`, `INTERSECTION`, `EXCEPT`, `AGGREGATE` pipe operators [#17312](https://github.com/apache/datafusion/pull/17312) (simonvandel) +- [forward port] Change version to 50.1.0 and add changelog (#17748) [#17826](https://github.com/apache/datafusion/pull/17826) (alamb) +- chore(deps): bump maturin from 1.9.4 to 1.9.5 in /docs [#17940](https://github.com/apache/datafusion/pull/17940) (dependabot[bot]) +- docs: `Window::try_new_with_schema` with a descriptive error message [#17926](https://github.com/apache/datafusion/pull/17926) (dqkqd) +- Support `JOIN` pipe operator [#17969](https://github.com/apache/datafusion/pull/17969) (simonvandel) +- Adds Object Store Profiling options/commands to CLI [#18004](https://github.com/apache/datafusion/pull/18004) (BlakeOrth) +- docs: typo in `working-with-exprs.md` [#18033](https://github.com/apache/datafusion/pull/18033) (Weijun-H) +- chore(deps): bump maturin from 1.9.5 to 1.9.6 in /docs [#18039](https://github.com/apache/datafusion/pull/18039) (dependabot[bot]) +- [forward port] Change version to 50.2.0 and add changelog [#18057](https://github.com/apache/datafusion/pull/18057) (xudong963) +- Update committers on governance page [#18015](https://github.com/apache/datafusion/pull/18015) (alamb) +- Feat: Make current_date aware of execution timezone. [#18034](https://github.com/apache/datafusion/pull/18034) (codetyri0n) +- Add independent configs for topk/join dynamic filter [#18090](https://github.com/apache/datafusion/pull/18090) (xudong963) +- Adds Trace and Summary to CLI instrumented stores [#18064](https://github.com/apache/datafusion/pull/18064) (BlakeOrth) +- refactor: add dialect enum [#18043](https://github.com/apache/datafusion/pull/18043) (dariocurr) +- #17982 Make `nvl` a thin wrapper for `coalesce` [#17991](https://github.com/apache/datafusion/pull/17991) (pepijnve) +- minor: fix incorrect deprecation version & window docs [#18093](https://github.com/apache/datafusion/pull/18093) (Jefffrey) +- Adding hiop as known user [#18114](https://github.com/apache/datafusion/pull/18114) (enryls) +- Improve datafusion-cli object store profiling summary display [#18085](https://github.com/apache/datafusion/pull/18085) (alamb) +- Feat: Make current_time aware of execution timezone. [#18040](https://github.com/apache/datafusion/pull/18040) (codetyri0n) +- Docs: Update SQL example for current_time() and current_date(). [#18200](https://github.com/apache/datafusion/pull/18200) (codetyri0n) +- doc: Add `Metrics` section to the user-guide [#18216](https://github.com/apache/datafusion/pull/18216) (2010YOUY01) +- docs: Update HOWTOs for adding new functions [#18089](https://github.com/apache/datafusion/pull/18089) (Jefffrey) +- docs: fix trim for `rust,ignore` blocks [#18239](https://github.com/apache/datafusion/pull/18239) (Jefffrey) +- docs: refine `AggregateUDFImpl::is_ordered_set_aggregate` documentation [#17805](https://github.com/apache/datafusion/pull/17805) (Jefffrey) +- docs: fix broken SQL & DataFrame links in root README (#18153) [#18274](https://github.com/apache/datafusion/pull/18274) (manasa-manoj-nbr) +- doc: Contributor guide for AI-generated PRs [#18237](https://github.com/apache/datafusion/pull/18237) (2010YOUY01) +- doc: Add Join Physical Plan documentation, and configuration flag to benchmarks [#18209](https://github.com/apache/datafusion/pull/18209) (jonathanc-n) +- "Gentle Introduction to Arrow / Record Batches" #11336 [#18051](https://github.com/apache/datafusion/pull/18051) (sm4rtm4art) +- Upgrade DataFusion to arrow/parquet 57.0.0 [#17888](https://github.com/apache/datafusion/pull/17888) (alamb) +- Deduplicate range/gen_series nested functions code [#18198](https://github.com/apache/datafusion/pull/18198) (Jefffrey) +- minor: doc fixes for timestamp output format [#18315](https://github.com/apache/datafusion/pull/18315) (Jefffrey) +- Add PostgreSQL-style named arguments support for scalar functions [#18019](https://github.com/apache/datafusion/pull/18019) (bubulalabu) +- Change default prefetch_hint to 512Kb to reduce number of object store requests when reading parquet files [#18160](https://github.com/apache/datafusion/pull/18160) (zhuqi-lucas) +- Bump MSRV to 1.88.0 [#18403](https://github.com/apache/datafusion/pull/18403) (harshasiddartha) +- Change default `time_zone` to `None` (was `"+00:00"`) [#18359](https://github.com/apache/datafusion/pull/18359) (Omega359) +- Fix instances of "the the" to be "the" in comments/docs [#18478](https://github.com/apache/datafusion/pull/18478) (corasaurus-hex) +- Update roadmap links for DataFusion Q1 2026 [#18495](https://github.com/apache/datafusion/pull/18495) (alamb) +- Add a SpillingPool to manage collections of spill files [#18207](https://github.com/apache/datafusion/pull/18207) (adriangb) +- [branch-51] Update version to 51.0.0, add Changelog [#18551](https://github.com/apache/datafusion/pull/18551) (alamb) +- [branch-51] Revert rewrite for coalesce, `nvl` and `nvl2` simplification [#18567](https://github.com/apache/datafusion/pull/18567) (alamb) + +**Other:** + +- Extract complex default impls from AggregateUDFImpl trait [#17391](https://github.com/apache/datafusion/pull/17391) (findepi) +- chore: make `TableFunction` clonable [#17457](https://github.com/apache/datafusion/pull/17457) (sunng87) +- chore(deps): bump wasm-bindgen-test from 0.3.50 to 0.3.51 [#17470](https://github.com/apache/datafusion/pull/17470) (dependabot[bot]) +- chore(deps): bump log from 0.4.27 to 0.4.28 [#17471](https://github.com/apache/datafusion/pull/17471) (dependabot[bot]) +- Support csv truncated rows in datafusion [#17465](https://github.com/apache/datafusion/pull/17465) (zhuqi-lucas) +- chore(deps): bump indexmap from 2.11.0 to 2.11.1 [#17484](https://github.com/apache/datafusion/pull/17484) (dependabot[bot]) +- chore(deps): bump chrono from 0.4.41 to 0.4.42 [#17483](https://github.com/apache/datafusion/pull/17483) (dependabot[bot]) +- Improve `PartialEq`, `Eq` speed for `LexOrdering`, make `PartialEq` and `PartialOrd` consistent [#17442](https://github.com/apache/datafusion/pull/17442) (findepi) +- Fix array types coercion: preserve child element nullability for list types [#17306](https://github.com/apache/datafusion/pull/17306) (sgrebnov) +- better preserve statistics when applying limits [#17381](https://github.com/apache/datafusion/pull/17381) (adriangb) +- Refactor HashJoinExec to progressively accumulate dynamic filter bounds instead of computing them after data is accumulated [#17444](https://github.com/apache/datafusion/pull/17444) (adriangb) +- Fix `PartialOrd` for logical plan nodes and expressions [#17438](https://github.com/apache/datafusion/pull/17438) (findepi) +- chore(deps): bump sqllogictest from 0.28.3 to 0.28.4 [#17500](https://github.com/apache/datafusion/pull/17500) (dependabot[bot]) +- chore(deps): bump tempfile from 3.21.0 to 3.22.0 [#17499](https://github.com/apache/datafusion/pull/17499) (dependabot[bot]) +- refactor: Move `SMJ` tests into own file [#17495](https://github.com/apache/datafusion/pull/17495) (jonathanc-n) +- move MinAggregator and MaxAggregator to functions-aggregate-common [#17492](https://github.com/apache/datafusion/pull/17492) (adriangb) +- Update datafusion-testing pin to update expected output for extended tests [#17490](https://github.com/apache/datafusion/pull/17490) (alamb) +- update physical-plan to use datafusion-functions-aggregate-common for Min/MaxAccumulator [#17502](https://github.com/apache/datafusion/pull/17502) (adriangb) +- bug: Always use 'indent' format for explain verbose [#17481](https://github.com/apache/datafusion/pull/17481) (petern48) +- Fix ambiguous column names in substrait conversion as a result of literals having the same name during conversion. [#17299](https://github.com/apache/datafusion/pull/17299) (xanderbailey) +- Fix NULL Arithmetic Handling for Numerical Operators in Type Coercion [#17418](https://github.com/apache/datafusion/pull/17418) (etolbakov) +- Prepare for Merge Queue [#17183](https://github.com/apache/datafusion/pull/17183) (blaginin) +- bug: Support null as argument to to_local_time [#17491](https://github.com/apache/datafusion/pull/17491) (petern48) +- Implement timestamp_cast_dtype for SqliteDialect [#17479](https://github.com/apache/datafusion/pull/17479) (krinart) +- Disable `required_status_checks` for now [#17537](https://github.com/apache/datafusion/pull/17537) (blaginin) +- Update Bug issue template to use Bug issue type [#17540](https://github.com/apache/datafusion/pull/17540) (findepi) +- Fix predicate simplification for incompatible types in push_down_filter [#17521](https://github.com/apache/datafusion/pull/17521) (adriangb) +- Add assertion that ScalarUDFImpl implementation is consistent with declared return type [#17515](https://github.com/apache/datafusion/pull/17515) (findepi) +- Using `encode_arrow_schema` from arrow-rs. [#17543](https://github.com/apache/datafusion/pull/17543) (samueleresca) +- Add test for decimal256 and float math [#17530](https://github.com/apache/datafusion/pull/17530) (Jefffrey) +- Document how schema projection works. [#17250](https://github.com/apache/datafusion/pull/17250) (wiedld) +- chore(deps): bump rust_decimal from 1.37.2 to 1.38.0 [#17564](https://github.com/apache/datafusion/pull/17564) (dependabot[bot]) +- chore(deps): bump semver from 1.0.26 to 1.0.27 [#17566](https://github.com/apache/datafusion/pull/17566) (dependabot[bot]) +- Generalize struct-to-struct casting with CastOptions and SchemaAdapter integration [#17468](https://github.com/apache/datafusion/pull/17468) (kosiew) +- Add `TableProvider::scan_with_args` [#17336](https://github.com/apache/datafusion/pull/17336) (adriangb) +- Use taiki-e/install-action and binstall in CI [#17573](https://github.com/apache/datafusion/pull/17573) (AdamGS) +- Trying cargo machete to prune unused deps. [#17545](https://github.com/apache/datafusion/pull/17545) (samueleresca) +- Fix typo in error message in `substring.rs` [#17570](https://github.com/apache/datafusion/pull/17570) (AdamGS) +- chore(deps): bump taiki-e/install-action from 2.61.5 to 2.61.6 [#17586](https://github.com/apache/datafusion/pull/17586) (dependabot[bot]) +- datafusion/substrait: enable `unicode_expressions` in dev-dependencies to fix substring planning test [#17584](https://github.com/apache/datafusion/pull/17584) (kosiew) +- chore: replace deprecated UnionExec API [#17588](https://github.com/apache/datafusion/pull/17588) (etolbakov) +- minor: fix compilation issue for extended tests due to missing parquet encryption flag [#17579](https://github.com/apache/datafusion/pull/17579) (Jefffrey) +- Update release README for new `datafusion/physical-expr-adapter` crate [#17591](https://github.com/apache/datafusion/pull/17591) (xudong963) +- chore(deps): bump indexmap from 2.11.1 to 2.11.3 [#17587](https://github.com/apache/datafusion/pull/17587) (dependabot[bot]) +- chore(deps): bump serde_json from 1.0.143 to 1.0.145 [#17585](https://github.com/apache/datafusion/pull/17585) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.61.6 to 2.61.8 [#17615](https://github.com/apache/datafusion/pull/17615) (dependabot[bot]) +- Always run CI checks [#17538](https://github.com/apache/datafusion/pull/17538) (blaginin) +- Revert "Always run CI checks" [#17629](https://github.com/apache/datafusion/pull/17629) (blaginin) +- Bump datafusion-testing to latest [#17609](https://github.com/apache/datafusion/pull/17609) (Jefffrey) +- Use `Display` formatting of `DataType`:s in error messages [#17565](https://github.com/apache/datafusion/pull/17565) (emilk) +- `avg(distinct)` support for decimal types [#17560](https://github.com/apache/datafusion/pull/17560) (Jefffrey) +- chore(deps): bump taiki-e/install-action from 2.61.8 to 2.61.9 [#17640](https://github.com/apache/datafusion/pull/17640) (dependabot[bot]) +- chore(deps): bump Swatinem/rust-cache from 2.8.0 to 2.8.1 [#17641](https://github.com/apache/datafusion/pull/17641) (dependabot[bot]) +- Validate the memory consumption in SPM created by multi level merge [#17029](https://github.com/apache/datafusion/pull/17029) (ding-young) +- fix(SubqueryAlias): use maybe_project_redundant_column [#17478](https://github.com/apache/datafusion/pull/17478) (notfilippo) +- minor: Ensure `datafusion-sql` package dependencies have `sql` flag [#17644](https://github.com/apache/datafusion/pull/17644) (Jefffrey) +- optimizer: Rewrite `IS NOT DISTINCT FROM` joins as Hash Joins [#17319](https://github.com/apache/datafusion/pull/17319) (2010YOUY01) +- chore(deps): bump serde from 1.0.223 to 1.0.225 [#17614](https://github.com/apache/datafusion/pull/17614) (dependabot[bot]) +- chore: Update dynamic filter formatting [#17647](https://github.com/apache/datafusion/pull/17647) (rkrishn7) +- chore(deps): bump taiki-e/install-action from 2.61.9 to 2.61.10 [#17660](https://github.com/apache/datafusion/pull/17660) (dependabot[bot]) +- proto: don't include parquet feature by default [#17577](https://github.com/apache/datafusion/pull/17577) (jackkleeman) +- minor: Ensure `proto` crate has datetime & unicode expr flags in datafusion dev dependency [#17656](https://github.com/apache/datafusion/pull/17656) (Jefffrey) +- chore(deps): bump indexmap from 2.11.3 to 2.11.4 [#17661](https://github.com/apache/datafusion/pull/17661) (dependabot[bot]) +- Support Decimal32/64 types [#17501](https://github.com/apache/datafusion/pull/17501) (AdamGS) +- minor: Improve hygiene for `datafusion-functions` macros [#17638](https://github.com/apache/datafusion/pull/17638) (Jefffrey) +- [unparser] Custom timestamp format for DuckDB [#17653](https://github.com/apache/datafusion/pull/17653) (krinart) +- Support LargeList for array_sort [#17657](https://github.com/apache/datafusion/pull/17657) (Jefffrey) +- Support FixedSizeList for array_except [#17658](https://github.com/apache/datafusion/pull/17658) (Jefffrey) +- chore: refactor array fn signatures & add more slt tests [#17672](https://github.com/apache/datafusion/pull/17672) (Jefffrey) +- Support FixedSizeList for array_to_string [#17666](https://github.com/apache/datafusion/pull/17666) (Jefffrey) +- minor: add SQLancer fuzzed SLT case for natural joins [#17683](https://github.com/apache/datafusion/pull/17683) (Jefffrey) +- chore: Upgrade Rust version to 1.90.0 [#17677](https://github.com/apache/datafusion/pull/17677) (rkrishn7) +- Support FixedSizeList for array_position [#17659](https://github.com/apache/datafusion/pull/17659) (Jefffrey) +- chore(deps): bump the proto group with 2 updates [#16806](https://github.com/apache/datafusion/pull/16806) (dependabot[bot]) +- chore: update a bunch of dependencies [#17708](https://github.com/apache/datafusion/pull/17708) (Jefffrey) +- Support FixedSizeList for array_slice via coercion to List [#17667](https://github.com/apache/datafusion/pull/17667) (Jefffrey) +- chore(deps): bump taiki-e/install-action from 2.61.10 to 2.62.1 [#17710](https://github.com/apache/datafusion/pull/17710) (dependabot[bot]) +- fix(agg/corr): return NULL when variance is zero or samples < 2 [#17621](https://github.com/apache/datafusion/pull/17621) (killme2008) +- chore(deps): bump taiki-e/install-action from 2.62.1 to 2.62.4 [#17739](https://github.com/apache/datafusion/pull/17739) (dependabot[bot]) +- chore(deps): bump tempfile from 3.22.0 to 3.23.0 [#17741](https://github.com/apache/datafusion/pull/17741) (dependabot[bot]) +- chore: make `LimitPushPastWindows` public [#17736](https://github.com/apache/datafusion/pull/17736) (linhr) +- minor: create `OptimizerContext` with provided `ConfigOptions` [#17742](https://github.com/apache/datafusion/pull/17742) (MichaelScofield) +- Add support for calling async UDF as aggregation expression [#17620](https://github.com/apache/datafusion/pull/17620) (simonvandel) +- chore(deps): bump taiki-e/install-action from 2.62.4 to 2.62.5 [#17750](https://github.com/apache/datafusion/pull/17750) (dependabot[bot]) +- (fix): Lag function creates unwanted projection (#17630) [#17639](https://github.com/apache/datafusion/pull/17639) (renato2099) +- Support `LargeList` in `array_has` simplification to `InList` [#17732](https://github.com/apache/datafusion/pull/17732) (Jefffrey) +- chore(deps): bump wasm-bindgen-test from 0.3.51 to 0.3.53 [#17642](https://github.com/apache/datafusion/pull/17642) (dependabot[bot]) +- chore(deps): bump object_store from 0.12.3 to 0.12.4 [#17753](https://github.com/apache/datafusion/pull/17753) (dependabot[bot]) +- Update `arrow` / `parquet` to 56.2.0 [#17631](https://github.com/apache/datafusion/pull/17631) (alamb) +- chore(deps): bump taiki-e/install-action from 2.62.5 to 2.62.6 [#17766](https://github.com/apache/datafusion/pull/17766) (dependabot[bot]) +- Keep aggregate udaf schema names unique when missing an order-by [#17731](https://github.com/apache/datafusion/pull/17731) (wiedld) +- feat : Display function alias in output column name [#17690](https://github.com/apache/datafusion/pull/17690) (devampatel03) +- Support join cardinality estimation less conservatively [#17476](https://github.com/apache/datafusion/pull/17476) (jackkleeman) +- chore(deps): bump libc from 0.2.175 to 0.2.176 [#17767](https://github.com/apache/datafusion/pull/17767) (dependabot[bot]) +- chore(deps): bump postgres-types from 0.2.9 to 0.2.10 [#17768](https://github.com/apache/datafusion/pull/17768) (dependabot[bot]) +- Use `Expr::qualified_name()` and `Column::new()` to extract partition keys from window and aggregate operators [#17757](https://github.com/apache/datafusion/pull/17757) (masonh22) +- chore(deps): bump taiki-e/install-action from 2.62.6 to 2.62.8 [#17781](https://github.com/apache/datafusion/pull/17781) (dependabot[bot]) +- chore(deps): bump wasm-bindgen-test from 0.3.53 to 0.3.54 [#17784](https://github.com/apache/datafusion/pull/17784) (dependabot[bot]) +- chore: Action some old TODOs in github actions [#17694](https://github.com/apache/datafusion/pull/17694) (Jefffrey) +- dev: Add benchmark for compilation profiles [#17754](https://github.com/apache/datafusion/pull/17754) (2010YOUY01) +- chore(deps): bump tokio-postgres from 0.7.13 to 0.7.14 [#17785](https://github.com/apache/datafusion/pull/17785) (dependabot[bot]) +- chore(deps): bump serde from 1.0.226 to 1.0.227 [#17783](https://github.com/apache/datafusion/pull/17783) (dependabot[bot]) +- chore(deps): bump regex from 1.11.2 to 1.11.3 [#17782](https://github.com/apache/datafusion/pull/17782) (dependabot[bot]) +- Test `CAST` from temporal to `Utf8View` [#17535](https://github.com/apache/datafusion/pull/17535) (findepi) +- chore: dependabot to run weekly [#17797](https://github.com/apache/datafusion/pull/17797) (comphead) +- chore(deps): bump sysinfo from 0.37.0 to 0.37.1 [#17800](https://github.com/apache/datafusion/pull/17800) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.62.8 to 2.62.9 [#17799](https://github.com/apache/datafusion/pull/17799) (dependabot[bot]) +- Fix potential overflow when we print verbose physical plan [#17798](https://github.com/apache/datafusion/pull/17798) (zhuqi-lucas) +- Extend datatype semantic equality check to include timestamps [#17777](https://github.com/apache/datafusion/pull/17777) (shivbhatia10) +- dev: Add Apache license check to the lint script [#17787](https://github.com/apache/datafusion/pull/17787) (2010YOUY01) +- Fix: common_sub_expression_eliminate optimizer rule failed [#16066](https://github.com/apache/datafusion/pull/16066) (Col-Waltz) +- chore: remove dialect fixes in SLT tests that are outdated [#17807](https://github.com/apache/datafusion/pull/17807) (Jefffrey) +- chore(deps): bump thiserror from 2.0.16 to 2.0.17 [#17821](https://github.com/apache/datafusion/pull/17821) (dependabot[bot]) +- chore(deps): bump quote from 1.0.40 to 1.0.41 [#17822](https://github.com/apache/datafusion/pull/17822) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.62.9 to 2.62.12 [#17823](https://github.com/apache/datafusion/pull/17823) (dependabot[bot]) +- chore(deps): bump serde from 1.0.227 to 1.0.228 [#17827](https://github.com/apache/datafusion/pull/17827) (dependabot[bot]) +- Temporarily disable failing `sql_planner` benchmark query [#17809](https://github.com/apache/datafusion/pull/17809) (alamb) +- chore(deps): bump taiki-e/install-action from 2.62.12 to 2.62.13 [#17836](https://github.com/apache/datafusion/pull/17836) (dependabot[bot]) +- More decimal 32/64 support - type coercsion and misc gaps [#17808](https://github.com/apache/datafusion/pull/17808) (AdamGS) +- Implement `AsRef` for `Expr` [#17819](https://github.com/apache/datafusion/pull/17819) (findepi) +- chore(deps): bump taiki-e/install-action from 2.62.13 to 2.62.14 [#17840](https://github.com/apache/datafusion/pull/17840) (dependabot[bot]) +- chore(deps): bump petgraph from 0.8.2 to 0.8.3 [#17842](https://github.com/apache/datafusion/pull/17842) (dependabot[bot]) +- Relax constraint that file sort order must only reference individual columns [#17419](https://github.com/apache/datafusion/pull/17419) (pepijnve) +- minor: Include consumer name in OOM message [#17870](https://github.com/apache/datafusion/pull/17870) (andygrove) +- Implement `partition_statistics` API for `InterleaveExec` [#17051](https://github.com/apache/datafusion/pull/17051) (liamzwbao) +- Add `CastColumnExpr` for struct-aware column casting [#17773](https://github.com/apache/datafusion/pull/17773) (kosiew) +- chore(deps): bump taiki-e/install-action from 2.62.14 to 2.62.16 [#17879](https://github.com/apache/datafusion/pull/17879) (dependabot[bot]) +- chore(deps): bump crate-ci/typos from 1.37.0 to 1.37.1 [#17878](https://github.com/apache/datafusion/pull/17878) (dependabot[bot]) +- Fix failing CI caused by hash collisions [#17886](https://github.com/apache/datafusion/pull/17886) (liamzwbao) +- Minor: reuse test schemas in simplify tests [#17864](https://github.com/apache/datafusion/pull/17864) (alamb) +- Make limit pushdown work for SortPreservingMergeExec [#17893](https://github.com/apache/datafusion/pull/17893) (Dandandan) +- chore(deps): bump taiki-e/install-action from 2.62.16 to 2.62.17 [#17896](https://github.com/apache/datafusion/pull/17896) (dependabot[bot]) +- Consolidate `apply_schema_adapter_tests` [#17905](https://github.com/apache/datafusion/pull/17905) (alamb) +- Improve `InListExpr` plan display [#17884](https://github.com/apache/datafusion/pull/17884) (pepijnve) +- Export JoinSetTracerError from datafusion-common-runtime [#17877](https://github.com/apache/datafusion/pull/17877) (JanKaul) +- Clippy to `extended_tests` [#17922](https://github.com/apache/datafusion/pull/17922) (blaginin) +- chore: rename Schema `print_schema_tree` to `tree_string` [#17919](https://github.com/apache/datafusion/pull/17919) (comphead) +- chore: utilize trait upcasting for AsyncScalarUDF PartialEq & Hash [#17872](https://github.com/apache/datafusion/pull/17872) (Jefffrey) +- Refactor: Update enforce_sorting tests to use insta snapshots for easier updates [#17900](https://github.com/apache/datafusion/pull/17900) (alamb) +- chore(deps): bump flate2 from 1.1.2 to 1.1.4 [#17938](https://github.com/apache/datafusion/pull/17938) (dependabot[bot]) +- chore(deps): bump actions/stale from 10.0.0 to 10.1.0 [#17937](https://github.com/apache/datafusion/pull/17937) (dependabot[bot]) +- chore(deps): bump aws-credential-types from 1.2.6 to 1.2.7 [#17936](https://github.com/apache/datafusion/pull/17936) (dependabot[bot]) +- chore(deps): bump rustyline from 17.0.1 to 17.0.2 [#17932](https://github.com/apache/datafusion/pull/17932) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.62.17 to 2.62.21 [#17934](https://github.com/apache/datafusion/pull/17934) (dependabot[bot]) +- chore(deps): bump crate-ci/typos from 1.37.1 to 1.37.2 [#17935](https://github.com/apache/datafusion/pull/17935) (dependabot[bot]) +- chore: upgrade sqlparser [#17925](https://github.com/apache/datafusion/pull/17925) (chenkovsky) +- minor: impl Clone and Debug on CaseBuilder [#17927](https://github.com/apache/datafusion/pull/17927) (timsaucer) +- chore: Extend backtrace coverage for `Execution` and `Internal` errors [#17921](https://github.com/apache/datafusion/pull/17921) (comphead) +- chore(deps): bump taiki-e/install-action from 2.62.21 to 2.62.22 [#17949](https://github.com/apache/datafusion/pull/17949) (dependabot[bot]) +- chore(deps): bump crate-ci/typos from 1.37.2 to 1.38.0 [#17948](https://github.com/apache/datafusion/pull/17948) (dependabot[bot]) +- Feat: [datafusion-spark] Migrate avg from comet to datafusion-spark and add tests. [#17871](https://github.com/apache/datafusion/pull/17871) (codetyri0n) +- Update tests to use insta / make them easier to update [#17945](https://github.com/apache/datafusion/pull/17945) (alamb) +- Minor Test refactor: avoid creating the same SchemaRef [#17951](https://github.com/apache/datafusion/pull/17951) (alamb) +- Precision::::{add, sub, multiply}: avoid overflows [#17929](https://github.com/apache/datafusion/pull/17929) (Tpt) +- Resolve `ListingScan` projection against table schema including partition columns [#17911](https://github.com/apache/datafusion/pull/17911) (mach-kernel) +- chore(deps): bump crate-ci/typos from 1.38.0 to 1.38.1 [#17960](https://github.com/apache/datafusion/pull/17960) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.62.22 to 2.62.23 [#17959](https://github.com/apache/datafusion/pull/17959) (dependabot[bot]) +- bench: fix `vectorized_equal_to` bench mutated between iterations [#17968](https://github.com/apache/datafusion/pull/17968) (rluvaton) +- fix docs and broken example from #17956 [#17980](https://github.com/apache/datafusion/pull/17980) (adriangb) +- Refactor: Update `replace_with_order_preserving_variants` tests to use insta snapshots for easier updates [#17962](https://github.com/apache/datafusion/pull/17962) (blaginin) +- Support repartitioned() method in RepartitionExec [#17990](https://github.com/apache/datafusion/pull/17990) (gabotechs) +- Adds Instrumented Object Store to CLI [#17984](https://github.com/apache/datafusion/pull/17984) (BlakeOrth) +- Migrate `join_selection` tests to snapshot-based testing [#17974](https://github.com/apache/datafusion/pull/17974) (blaginin) +- bench: fix actually generate a lot of unique values in benchmark table [#17967](https://github.com/apache/datafusion/pull/17967) (rluvaton) +- Adds Instrument Mode for InstrumentedObjectStore in datafusion-cli [#18000](https://github.com/apache/datafusion/pull/18000) (BlakeOrth) +- minor: refactor Spark ascii function to reuse DataFusion ascii function code [#17965](https://github.com/apache/datafusion/pull/17965) (Jefffrey) +- chore(deps): bump taiki-e/install-action from 2.62.23 to 2.62.24 [#17989](https://github.com/apache/datafusion/pull/17989) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.62.24 to 2.62.25 [#18007](https://github.com/apache/datafusion/pull/18007) (dependabot[bot]) +- Clarify documentation that ScalarUDFImpl::simplity must not change the schema [#17981](https://github.com/apache/datafusion/pull/17981) (alamb) +- Expose trace_future and trace_block outside of common-runtime [#17976](https://github.com/apache/datafusion/pull/17976) (AdamGS) +- Adds instrumentation to get requests for datafusion-cli [#18016](https://github.com/apache/datafusion/pull/18016) (BlakeOrth) +- chore(deps): bump half from 2.6.0 to 2.7.0 [#18036](https://github.com/apache/datafusion/pull/18036) (dependabot[bot]) +- chore(deps): bump aws-config from 1.8.6 to 1.8.7 [#18038](https://github.com/apache/datafusion/pull/18038) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.62.25 to 2.62.28 [#18037](https://github.com/apache/datafusion/pull/18037) (dependabot[bot]) +- refactor: cleanup naming and macro usages for binary operator [#17985](https://github.com/apache/datafusion/pull/17985) (sunng87) +- Impl `gather_filters_for_pushdown` for `CoalescePartitionsExec` [#18046](https://github.com/apache/datafusion/pull/18046) (xudong963) +- Fix bug in LimitPushPastWindows [#18029](https://github.com/apache/datafusion/pull/18029) (avantgardnerio) +- Fix `SortPreservingMergeExec` tree formatting with limit [#18009](https://github.com/apache/datafusion/pull/18009) (AdamGS) +- chore(deps): bump actions/setup-node from 5.0.0 to 6.0.0 [#18049](https://github.com/apache/datafusion/pull/18049) (dependabot[bot]) +- chore(deps): bump sysinfo from 0.37.1 to 0.37.2 [#18035](https://github.com/apache/datafusion/pull/18035) (dependabot[bot]) +- FileScanConfig: Preserve schema metadata across ser/de boundary [#17966](https://github.com/apache/datafusion/pull/17966) (mach-kernel) +- physical-plan: push filters down to UnionExec children [#18054](https://github.com/apache/datafusion/pull/18054) (asubiotto) +- Add `min_max_bytes` benchmark (Reproduce quadratic runtime in min_max_bytes) [#18041](https://github.com/apache/datafusion/pull/18041) (ctsk) +- Adds summary output to CLI instrumented object stores [#18045](https://github.com/apache/datafusion/pull/18045) (BlakeOrth) +- Impl spark bit not function [#18018](https://github.com/apache/datafusion/pull/18018) (kazantsev-maksim) +- chore: revert tests [#18065](https://github.com/apache/datafusion/pull/18065) (comphead) +- chore: Use an enum to express the different kinds of nullability in an array [#18048](https://github.com/apache/datafusion/pull/18048) (martin-g) +- chore(deps): bump taiki-e/install-action from 2.62.28 to 2.62.29 [#18069](https://github.com/apache/datafusion/pull/18069) (dependabot[bot]) +- Split up monster test_window_partial_constant_and_set_monotonicity into smaller functions [#17952](https://github.com/apache/datafusion/pull/17952) (alamb) +- Push Down Filter Subexpressions in Nested Loop Joins as Projections [#17906](https://github.com/apache/datafusion/pull/17906) (tobixdev) +- ci: Use PR description for merge commit body in squash merges [#18027](https://github.com/apache/datafusion/pull/18027) (Weijun-H) +- Fix extended tests on main to get CI green [#18096](https://github.com/apache/datafusion/pull/18096) (alamb) +- chore(deps): bump taiki-e/install-action from 2.62.29 to 2.62.31 [#18094](https://github.com/apache/datafusion/pull/18094) (dependabot[bot]) +- chore: run extended suite on PRs for critical areas [#18088](https://github.com/apache/datafusion/pull/18088) (comphead) +- chore(deps): bump taiki-e/install-action from 2.62.31 to 2.62.33 [#18113](https://github.com/apache/datafusion/pull/18113) (dependabot[bot]) +- chore: remove unnecessary `skip_failed_rules` config in slt [#18117](https://github.com/apache/datafusion/pull/18117) (Jefffrey) +- Refactor repartition to use `insta` [#18106](https://github.com/apache/datafusion/pull/18106) (blaginin) +- refactor: move ListingTable over to the catalog-listing-table crate [#18080](https://github.com/apache/datafusion/pull/18080) (timsaucer) +- refactor: move arrow datasource to new `datafusion-datasource-arrow` crate [#18082](https://github.com/apache/datafusion/pull/18082) (timsaucer) +- Adds instrumentation to LIST operations in CLI [#18103](https://github.com/apache/datafusion/pull/18103) (BlakeOrth) +- Add extra case_when benchmarks [#18097](https://github.com/apache/datafusion/pull/18097) (pepijnve) +- Adds instrumentation to delimited LIST operations in CLI [#18134](https://github.com/apache/datafusion/pull/18134) (BlakeOrth) +- test: `to_timestamp(double)` for vectorized input [#18147](https://github.com/apache/datafusion/pull/18147) (dqkqd) +- Fix `concat_elements_utf8view` capacity initialization. [#18003](https://github.com/apache/datafusion/pull/18003) (samueleresca) +- Use < instead of = in case benchmark predicates, use Integers [#18144](https://github.com/apache/datafusion/pull/18144) (pepijnve) +- Adds instrumentation to PUT ops in the CLI [#18139](https://github.com/apache/datafusion/pull/18139) (BlakeOrth) +- [main] chore: Fix `no space left on device` (#18141) [#18151](https://github.com/apache/datafusion/pull/18151) (alamb) +- Fix `DISTINCT ON` for tables with no columns (ReplaceDistinctWithAggregate: do not fail when on input without columns) [#18133](https://github.com/apache/datafusion/pull/18133) (Tpt) +- Fix quadratic runtime in min_max_bytes [#18044](https://github.com/apache/datafusion/pull/18044) (ctsk) +- chore(deps): bump getrandom from 0.3.3 to 0.3.4 [#18163](https://github.com/apache/datafusion/pull/18163) (dependabot[bot]) +- chore(deps): bump tokio from 1.47.1 to 1.48.0 [#18164](https://github.com/apache/datafusion/pull/18164) (dependabot[bot]) +- chore(deps): bump indexmap from 2.11.4 to 2.12.0 [#18162](https://github.com/apache/datafusion/pull/18162) (dependabot[bot]) +- chore(deps): bump bzip2 from 0.6.0 to 0.6.1 [#18165](https://github.com/apache/datafusion/pull/18165) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.62.33 to 2.62.34 [#18194](https://github.com/apache/datafusion/pull/18194) (dependabot[bot]) +- Fix COPY TO does not produce an output file for the empty set [#18074](https://github.com/apache/datafusion/pull/18074) (bert-beyondloops) +- Add Projection struct w/ helper methods to manipulate projections [#18176](https://github.com/apache/datafusion/pull/18176) (adriangb) +- Add TableSchema helper to encapsulate file schema + partition fields [#18178](https://github.com/apache/datafusion/pull/18178) (adriangb) +- Add spilling to RepartitionExec [#18014](https://github.com/apache/datafusion/pull/18014) (adriangb) +- Adds DELETE and HEAD instrumentation to CLI [#18206](https://github.com/apache/datafusion/pull/18206) (BlakeOrth) +- [branch-50] Prepare 50.3.0 release version number and README (#18173) [#18182](https://github.com/apache/datafusion/pull/18182) (alamb) +- Fix array_has simplification with null argument [#18186](https://github.com/apache/datafusion/pull/18186) (joroKr21) +- chore(deps): bump taiki-e/install-action from 2.62.34 to 2.62.35 [#18215](https://github.com/apache/datafusion/pull/18215) (dependabot[bot]) +- bench: create benchmark for lookup table like `CASE WHEN` [#18203](https://github.com/apache/datafusion/pull/18203) (rluvaton) +- Adds instrumentation to COPY operations in the CLI [#18227](https://github.com/apache/datafusion/pull/18227) (BlakeOrth) +- Consolidate core_integration/datasource and rename parquet_source --> parquet_integration [#18226](https://github.com/apache/datafusion/pull/18226) (alamb) +- CoalescePartitionsExec fetch is not consistent with one partition and more than one partition [#18245](https://github.com/apache/datafusion/pull/18245) (zhuqi-lucas) +- Migrate core test to insta part 3 [#16978](https://github.com/apache/datafusion/pull/16978) (Chen-Yuan-Lai) +- chore(deps): bump taiki-e/install-action from 2.62.35 to 2.62.36 [#18240](https://github.com/apache/datafusion/pull/18240) (dependabot[bot]) +- Fix: Do not normalize table names when deserializing from protobuf [#18187](https://github.com/apache/datafusion/pull/18187) (drin) +- Revert "chore: revert tests (#18065)" [#18255](https://github.com/apache/datafusion/pull/18255) (dqkqd) +- Refactor `nvl2` Function to Support Lazy Evaluation and Simplification via CASE Expression [#18191](https://github.com/apache/datafusion/pull/18191) (kosiew) +- fix null count stats computation [#18276](https://github.com/apache/datafusion/pull/18276) (adriangb) +- Improve docs and examples for `DataTypeExt` and `FieldExt` [#18271](https://github.com/apache/datafusion/pull/18271) (alamb) +- Easier construction of ScalarAndMetadata [#18272](https://github.com/apache/datafusion/pull/18272) (alamb) +- Add integration test for IO operations for listing tables queries [#18229](https://github.com/apache/datafusion/pull/18229) (alamb) +- Fix: Error rather than silently ignore extra parameter passed to ceil/floor [#18265](https://github.com/apache/datafusion/pull/18265) (toxicteddy00077) +- chore(deps): Update `half` to 2.7.1, ignore `RUSTSEC-2025-0111` [#18287](https://github.com/apache/datafusion/pull/18287) (alamb) +- chore(deps): bump taiki-e/install-action from 2.62.36 to 2.62.38 [#18293](https://github.com/apache/datafusion/pull/18293) (dependabot[bot]) +- chore(deps): bump regex from 1.11.3 to 1.12.2 [#18294](https://github.com/apache/datafusion/pull/18294) (dependabot[bot]) +- chore(deps): bump clap from 4.5.48 to 4.5.50 [#18292](https://github.com/apache/datafusion/pull/18292) (dependabot[bot]) +- chore(deps): bump syn from 2.0.106 to 2.0.108 [#18291](https://github.com/apache/datafusion/pull/18291) (dependabot[bot]) +- Enforce unique names for `is_set` on `first_value` and `last_value` [#18303](https://github.com/apache/datafusion/pull/18303) (marc-pydantic) +- chore(deps): update testcontainers to `0.25.2` and drop ignore of `RUSTSEC-2025-0111` [#18305](https://github.com/apache/datafusion/pull/18305) (DDtKey) +- Using `try_append_value` from arrow-rs 57.0.0 [#18313](https://github.com/apache/datafusion/pull/18313) (samueleresca) +- minor: Add documentation to function `concat_elements_utf8view` [#18316](https://github.com/apache/datafusion/pull/18316) (2010YOUY01) +- chore(deps): bump taiki-e/install-action from 2.62.38 to 2.62.40 [#18318](https://github.com/apache/datafusion/pull/18318) (dependabot[bot]) +- Fix: Add projection to generate_series [#18298](https://github.com/apache/datafusion/pull/18298) (mkleen) +- Do not accept null is_set for first_value/last_value [#18301](https://github.com/apache/datafusion/pull/18301) (marc-pydantic) +- Optimize merging of partial case expression results [#18152](https://github.com/apache/datafusion/pull/18152) (pepijnve) +- chore: Format examples in doc strings - execution [#18339](https://github.com/apache/datafusion/pull/18339) (CuteChuanChuan) +- chore: Format examples in doc strings - common [#18336](https://github.com/apache/datafusion/pull/18336) (CuteChuanChuan) +- chore: Format examples in doc strings - crate datafusion [#18333](https://github.com/apache/datafusion/pull/18333) (CuteChuanChuan) +- chore: Format examples in doc strings - expr [#18340](https://github.com/apache/datafusion/pull/18340) (CuteChuanChuan) +- chore: Format examples in doc strings - datasource crates [#18338](https://github.com/apache/datafusion/pull/18338) (CuteChuanChuan) +- Insta for enforce_distrubution (easy ones) [#18248](https://github.com/apache/datafusion/pull/18248) (blaginin) +- chore: Format examples in doc strings - macros and optmizer [#18354](https://github.com/apache/datafusion/pull/18354) (CuteChuanChuan) +- chore: Format examples in doc strings - proto, pruning, and session [#18358](https://github.com/apache/datafusion/pull/18358) (CuteChuanChuan) +- chore: Format examples in doc strings - catalog listing [#18335](https://github.com/apache/datafusion/pull/18335) (CuteChuanChuan) +- ci: fix temporary file creation in tests and tighten CI check [#18374](https://github.com/apache/datafusion/pull/18374) (2010YOUY01) +- Run extended tests when there are changes to datafusion-testing pin [#18310](https://github.com/apache/datafusion/pull/18310) (alamb) +- Add simple unit test for `merge` in case expression [#18369](https://github.com/apache/datafusion/pull/18369) (pepijnve) +- chore(deps): bump taiki-e/install-action from 2.62.40 to 2.62.41 [#18377](https://github.com/apache/datafusion/pull/18377) (dependabot[bot]) +- Refactor `range`/`gen_series` signature away from user defined [#18317](https://github.com/apache/datafusion/pull/18317) (Jefffrey) +- Adds Partitioned CSV test to object store access tests [#18370](https://github.com/apache/datafusion/pull/18370) (BlakeOrth) +- Add reproducer for consecutive RepartitionExec [#18343](https://github.com/apache/datafusion/pull/18343) (NGA-TRAN) +- chore: bump substrait version to `0.60.0` to use substrait spec v0.75.0 [#17866](https://github.com/apache/datafusion/pull/17866) (benbellick) +- Use the upstream arrow-rs coalesce kernel [#17193](https://github.com/apache/datafusion/pull/17193) (zhuqi-lucas) +- Extract out super slow planning benchmark to it's own benchmark [#18388](https://github.com/apache/datafusion/pull/18388) (Omega359) +- minor: Fix parquet pruning metrics display order [#18379](https://github.com/apache/datafusion/pull/18379) (2010YOUY01) +- chore: use enum as `date_trunc` granularity [#18390](https://github.com/apache/datafusion/pull/18390) (comphead) +- chore(deps): bump taiki-e/install-action from 2.62.41 to 2.62.43 [#18398](https://github.com/apache/datafusion/pull/18398) (dependabot[bot]) +- Project record batches to avoid filtering unused columns in `CASE` evaluation [#18329](https://github.com/apache/datafusion/pull/18329) (pepijnve) +- catch errors when simplifying cast(lit(...), ...) and bubble those up [#18332](https://github.com/apache/datafusion/pull/18332) (adriangb) +- Align `NowFunc::new()` with canonical `ConfigOptions` timezone and enhance documentation [#18347](https://github.com/apache/datafusion/pull/18347) (kosiew) +- chore: Format examples in doc strings - physical expr, optimizer, and plan [#18357](https://github.com/apache/datafusion/pull/18357) (CuteChuanChuan) +- Fix: spark bit_count function [#18322](https://github.com/apache/datafusion/pull/18322) (kazantsev-maksim) +- chore: bump workspace rust version to 1.91.0 [#18422](https://github.com/apache/datafusion/pull/18422) (randyli) +- Minor: Remove unneccessary vec! in SortMergeJoinStream initialization [#18430](https://github.com/apache/datafusion/pull/18430) (mapleFU) +- minor: refactor array reverse internals [#18445](https://github.com/apache/datafusion/pull/18445) (Jefffrey) +- chore(deps): bump taiki-e/install-action from 2.62.43 to 2.62.45 [#18465](https://github.com/apache/datafusion/pull/18465) (dependabot[bot]) +- chore(deps): bump crate-ci/typos from 1.38.1 to 1.39.0 [#18464](https://github.com/apache/datafusion/pull/18464) (dependabot[bot]) +- chore(deps): bump rstest from 0.25.0 to 0.26.1 [#18463](https://github.com/apache/datafusion/pull/18463) (dependabot[bot]) +- chore(deps): bump wasm-bindgen-test from 0.3.54 to 0.3.55 [#18462](https://github.com/apache/datafusion/pull/18462) (dependabot[bot]) +- chore(deps): bump postgres-types from 0.2.10 to 0.2.11 [#18461](https://github.com/apache/datafusion/pull/18461) (dependabot[bot]) +- chore(deps): bump ctor from 0.4.3 to 0.6.1 [#18460](https://github.com/apache/datafusion/pull/18460) (dependabot[bot]) +- chore(deps): bump libc from 0.2.176 to 0.2.177 [#18459](https://github.com/apache/datafusion/pull/18459) (dependabot[bot]) +- chore: Format examples in doc strings - functions [#18353](https://github.com/apache/datafusion/pull/18353) (CuteChuanChuan) +- Feat: Support array flatten() on `List(LargeList(_))` types [#18363](https://github.com/apache/datafusion/pull/18363) (sdf-jkl) +- Reproducer tests for #18380 (resorting sorted inputs) [#18352](https://github.com/apache/datafusion/pull/18352) (rgehan) +- Update criterion to 0.7.\* [#18472](https://github.com/apache/datafusion/pull/18472) (Omega359) +- chore(deps): bump taiki-e/install-action from 2.62.45 to 2.62.46 [#18484](https://github.com/apache/datafusion/pull/18484) (dependabot[bot]) +- Consolidate flight examples (#18142) [#18442](https://github.com/apache/datafusion/pull/18442) (cj-zhukov) +- Support reverse for ListView [#18424](https://github.com/apache/datafusion/pull/18424) (vegarsti) +- Complete migrating `enforce_distrubution` tests to insta [#18185](https://github.com/apache/datafusion/pull/18185) (blaginin) +- Add benchmark for array_reverse [#18425](https://github.com/apache/datafusion/pull/18425) (vegarsti) +- chore: simplify map const [#18440](https://github.com/apache/datafusion/pull/18440) (chenkovsky) +- Fix an out of date comment for `snapshot_physical_expr` [#18498](https://github.com/apache/datafusion/pull/18498) (AdamGS) +- Disable `parquet_encryption` by default in datafusion-sqllogictests [#18492](https://github.com/apache/datafusion/pull/18492) (zhuqi-lucas) +- Make extended test to use optional parquet_encryption feature [#18507](https://github.com/apache/datafusion/pull/18507) (zhuqi-lucas) +- Consolidate udf examples (#18142) [#18493](https://github.com/apache/datafusion/pull/18493) (cj-zhukov) +- test: add prepare alias slt test [#18522](https://github.com/apache/datafusion/pull/18522) (dqkqd) +- CI: add `clippy::needless_pass_by_value` rule [#18468](https://github.com/apache/datafusion/pull/18468) (2010YOUY01) +- Refactor create_hashes to accept array references [#18448](https://github.com/apache/datafusion/pull/18448) (adriangb) +- chore: Format examples in doc strings - spark, sql, sqllogictest, sibstrait [#18443](https://github.com/apache/datafusion/pull/18443) (CuteChuanChuan) +- refactor: simplify `calculate_binary_math` in datafusion-functions [#18525](https://github.com/apache/datafusion/pull/18525) (Jefffrey) +- ci: enforce needless_pass_by_value for datafusion-optimzer [#18533](https://github.com/apache/datafusion/pull/18533) (jizezhang) +- Add comments to Cargo.toml about workspace overrides [#18526](https://github.com/apache/datafusion/pull/18526) (alamb) +- minor: Remove inconsistent comment [#18539](https://github.com/apache/datafusion/pull/18539) (2010YOUY01) +- Refactor `log()` signature to use coercion API + fixes [#18519](https://github.com/apache/datafusion/pull/18519) (Jefffrey) +- [branch-51] Update Changelog [#18592](https://github.com/apache/datafusion/pull/18592) (alamb) +- [branch-51] bugfix: correct regression on TableType in into_view in DF51 [#18618](https://github.com/apache/datafusion/pull/18618) (timsaucer) +- [branch-51]: Add timezone to date_trunc fast path (#18596) [#18629](https://github.com/apache/datafusion/pull/18629) (hareshkh) +- [branch-51] bugfix: select_columns should validate column names [#18624](https://github.com/apache/datafusion/pull/18624) (timsaucer) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 88 dependabot[bot] + 49 Jeffrey Vo + 35 Andrew Lamb + 20 Yongting You + 19 Adrian Garcia Badaracco + 14 Blake Orth + 12 Pepijn Van Eeckhoudt + 12 Piotr Findeisen + 11 Chen Chongchen + 11 Dmitrii Blaginin + 11 Yu-Chuan Hung + 9 Jonathan Chen + 9 Khanh Duong + 9 Oleks V + 9 Peter Nguyen + 8 Alex Huang + 8 Qi Zhu + 8 Raz Luvaton + 7 Adam Gutglick + 7 Rohan Krishnaswamy + 7 Tim Saucer + 7 kosiew + 6 xudong.w + 5 Nuno Faria + 4 Dhanush + 4 Samuele Resca + 4 Simon Vandel Sillesen + 4 Sriram Sundar + 4 Vegard Stikbakke + 3 Bruce Ritchie + 3 David López + 3 EeshanBembi + 3 Jack Kleeman + 3 Kazantsev Maksim + 3 Marko Milenković + 3 Thomas Tanon + 2 Andy Grove + 2 Bruno Volpato + 2 Christian + 2 Colin Marc + 2 Cora Sutton + 2 David Stancu + 2 Devam Patel + 2 Eugene Tolbakov + 2 Evgenii Glotov + 2 Kristin Cowalcijk + 2 Liam Bao + 2 Marc Brinkmann + 2 Michael Kleen + 2 Namgung Chan + 2 Ning Sun + 2 Randy + 2 Sergey Zhukov + 2 Viktor Yershov + 2 bubulalabu + 2 dennis zhuang + 2 jizezhang + 2 wiedld + 1 Ahmed Mezghani + 1 Aldrin M + 1 Alfonso Subiotto Marqués + 1 Anders + 1 Artem Medvedev + 1 Aryamaan Singh + 1 Ben Bellick + 1 Berkay Şahin + 1 Bert Vermeiren + 1 Brent Gardner + 1 Christopher Watford + 1 Dan Lovell + 1 Daniël Heres + 1 Dewey Dunnington + 1 Douglas Anderson + 1 Duong Cong Toai + 1 Emil Ernerfeldt + 1 Emily Matheys + 1 Enrico La Sala + 1 Eshed Schacham + 1 Filippo Rossi + 1 Gabriel + 1 Gene Bordegaray + 1 Georgi Krastev + 1 Haresh Khanna + 1 Heran Lin + 1 Hiroaki Yutani + 1 Ian Lai + 1 Ilya Ostanevich + 1 JanKaul + 1 Kosta Tarasov + 1 LFC + 1 Leonardo Yvens + 1 Lía Adriana + 1 Manasa Manoj + 1 Martin + 1 Martin Grigorov + 1 Martin Hilton + 1 Mason + 1 Matt Butrovich + 1 Matthew Kim + 1 Matthijs Brobbel + 1 Nga Tran + 1 Nihal Rajak + 1 Rafael Fernández + 1 Renan GEHAN + 1 Renato Marroquin + 1 Rok Mihevc + 1 Ruilei Ma + 1 Sai Mahendra + 1 Sergei Grebnov + 1 Shiv Bhatia + 1 Tobias Schwarzinger + 1 UBarney + 1 Victor Barua + 1 Victorien + 1 Vyquos + 1 Weston Pace + 1 XL Liang + 1 Xander + 1 Zhen Wang + 1 aditya singh rathore + 1 dario curreri + 1 ding-young + 1 feniljain + 1 gene-bordegaray + 1 harshasiddartha + 1 mwish + 1 peasee + 1 r1b + 1 theirix +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 9f2a3c608508..6e5e063a1292 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -98,7 +98,7 @@ The following configuration settings are available: | datafusion.execution.parquet.dictionary_page_size_limit | 1048576 | (writing) Sets best effort maximum dictionary page size, in bytes | | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 50.3.0 | (writing) Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 51.0.0 | (writing) Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length | | datafusion.execution.parquet.statistics_truncate_length | 64 | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.data_page_row_count_limit | 20000 | (writing) Sets best effort maximum number of rows in data page | diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 7c88d1fd9c3e..6df14d13ee40 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1056,7 +1056,7 @@ nullif(expression1, expression2) ### `nvl` -Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_ and _expression2_ is not evaluated. This function can be used to substitute a default value for NULL values. +Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_. ```sql nvl(expression1, expression2)