diff --git a/.asf.yaml b/.asf.yaml index 43c9250c2b826..805bb52456f40 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -65,6 +65,9 @@ github: branch-52: required_pull_request_reviews: required_approving_review_count: 1 + branch-53: + required_pull_request_reviews: + required_approving_review_count: 1 pull_requests: # enable updating head branches of pull requests allow_update_branch: true diff --git a/Cargo.lock b/Cargo.lock index 23670a7877041..610c13661b467 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,54 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "abi_stable" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d6512d3eb05ffe5004c59c206de7f99c34951504056ce23fc953842f12c445" -dependencies = [ - "abi_stable_derive", - "abi_stable_shared", - "const_panic", - "core_extensions", - "crossbeam-channel", - "generational-arena", - "libloading", - "lock_api", - "parking_lot", - "paste", - "repr_offset", - "rustc_version", - "serde", - "serde_derive", - "serde_json", -] - -[[package]] -name = "abi_stable_derive" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7178468b407a4ee10e881bc7a328a65e739f0863615cca4429d43916b05e898" -dependencies = [ - "abi_stable_shared", - "as_derive_utils", - "core_extensions", - "proc-macro2", - "quote", - "rustc_version", - "syn 1.0.109", - "typed-arena", -] - -[[package]] -name = "abi_stable_shared" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2b5df7688c123e63f4d4d649cba63f2967ba7f7861b1664fca3f77d3dad2b63" -dependencies = [ - "core_extensions", -] - [[package]] name = "adler2" version = "2.0.1" @@ -485,18 +437,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "as_derive_utils" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff3c96645900a44cf11941c111bd08a6573b0e2f9f69bc9264b179d8fae753c4" -dependencies = [ - "core_extensions", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "astral-tokio-tar" version = "0.5.6" @@ -530,9 +470,6 @@ name = "async-ffi" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4de21c0feef7e5a556e51af767c953f0501f7f300ba785cc99c47bdc8081a50" -dependencies = [ - "abi_stable", -] [[package]] name = "async-recursion" @@ -1470,15 +1407,6 @@ dependencies = [ "tiny-keccak", ] -[[package]] -name = "const_panic" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e262cdaac42494e3ae34c43969f9cdeb7da178bdb4b66fa6a1ea2edb4c8ae652" -dependencies = [ - "typewit", -] - [[package]] name = "constant_time_eq" version = "0.4.2" @@ -1501,21 +1429,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "core_extensions" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42bb5e5d0269fd4f739ea6cedaf29c16d81c27a7ce7582008e90eb50dcd57003" -dependencies = [ - "core_extensions_proc_macros", -] - -[[package]] -name = "core_extensions_proc_macros" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533d38ecd2709b7608fb8e18e4504deb99e9a72879e6aa66373a76d8dc4259ea" - [[package]] name = "cpufeatures" version = "0.2.17" @@ -1571,15 +1484,6 @@ dependencies = [ "itertools 0.13.0", ] -[[package]] -name = "crossbeam-channel" -version = "0.5.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -1715,7 +1619,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-schema", @@ -1789,7 +1693,7 @@ dependencies = [ [[package]] name = "datafusion-benchmarks" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "clap", @@ -1814,7 +1718,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -1837,7 +1741,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -1859,7 +1763,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -1890,7 +1794,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "apache-avro", @@ -1918,7 +1822,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.1.0" +version = "53.0.0" dependencies = [ "futures", "log", @@ -1927,7 +1831,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-compression", @@ -1962,7 +1866,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-ipc", @@ -1985,7 +1889,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "52.1.0" +version = "53.0.0" dependencies = [ "apache-avro", "arrow", @@ -2004,7 +1908,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2025,7 +1929,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2047,7 +1951,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2079,11 +1983,11 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.1.0" +version = "53.0.0" [[package]] name = "datafusion-examples" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-flight", @@ -2124,7 +2028,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -2147,7 +2051,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2171,7 +2075,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2182,9 +2086,8 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "52.1.0" +version = "53.0.0" dependencies = [ - "abi_stable", "arrow", "arrow-schema", "async-ffi", @@ -2211,12 +2114,13 @@ dependencies = [ "log", "prost", "semver", + "stabby", "tokio", ] [[package]] name = "datafusion-functions" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -2250,7 +2154,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "arrow", @@ -2272,7 +2176,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "arrow", @@ -2285,7 +2189,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-ord", @@ -2310,7 +2214,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2324,7 +2228,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "criterion", @@ -2341,7 +2245,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2349,7 +2253,7 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.1.0" +version = "53.0.0" dependencies = [ "datafusion-doc", "quote", @@ -2358,7 +2262,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2385,7 +2289,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "arrow", @@ -2412,7 +2316,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2425,7 +2329,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "arrow", @@ -2440,7 +2344,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2460,7 +2364,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "arrow", @@ -2497,7 +2401,7 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2535,7 +2439,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2547,7 +2451,7 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2565,7 +2469,7 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.1.0" +version = "53.0.0" dependencies = [ "async-trait", "datafusion-common", @@ -2577,7 +2481,7 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2603,7 +2507,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2629,7 +2533,7 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2660,7 +2564,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.1.0" +version = "53.0.0" dependencies = [ "async-recursion", "async-trait", @@ -2681,7 +2585,7 @@ dependencies = [ [[package]] name = "datafusion-wasmtest" -version = "52.1.0" +version = "53.0.0" dependencies = [ "bytes", "chrono", @@ -2954,7 +2858,6 @@ dependencies = [ name = "ffi_example_table_provider" version = "0.1.0" dependencies = [ - "abi_stable", "arrow", "datafusion", "datafusion-ffi", @@ -2965,7 +2868,6 @@ dependencies = [ name = "ffi_module_interface" version = "0.1.0" dependencies = [ - "abi_stable", "datafusion-ffi", ] @@ -2973,10 +2875,10 @@ dependencies = [ name = "ffi_module_loader" version = "0.1.0" dependencies = [ - "abi_stable", "datafusion", "datafusion-ffi", "ffi_module_interface", + "libloading", "tokio", ] @@ -3178,15 +3080,6 @@ dependencies = [ "prost-build", ] -[[package]] -name = "generational-arena" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877e94aff08e743b651baaea359664321055749b398adff8740a7399af7796e7" -dependencies = [ - "cfg-if", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -3911,12 +3804,12 @@ checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] name = "libloading" -version = "0.7.4" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" dependencies = [ "cfg-if", - "winapi", + "windows-link", ] [[package]] @@ -5128,15 +5021,6 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" -[[package]] -name = "repr_offset" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1070755bd29dffc19d0971cab794e607839ba2ef4b69a9e6fbc8733c1b72ea" -dependencies = [ - "tstr", -] - [[package]] name = "reqwest" version = "0.12.28" @@ -5621,6 +5505,12 @@ dependencies = [ "digest", ] +[[package]] +name = "sha2-const-stable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f179d4e11094a893b82fff208f74d448a7512f99f5a0acbd5c679b705f83ed9" + [[package]] name = "sharded-slab" version = "0.1.7" @@ -5763,6 +5653,41 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "stabby" +version = "72.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "976399a0c48ea769ef7f5dc303bb88240ab8d84008647a6b2303eced3dab3945" +dependencies = [ + "rustversion", + "stabby-abi", +] + +[[package]] +name = "stabby-abi" +version = "72.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7b54832a9a1f92a0e55e74a5c0332744426edc515bb3fbad82f10b874a87f0d" +dependencies = [ + "rustc_version", + "rustversion", + "sha2-const-stable", + "stabby-macros", +] + +[[package]] +name = "stabby-macros" +version = "72.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a768b1e51e4dbfa4fa52ae5c01241c0a41e2938fdffbb84add0c8238092f9091" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "rand 0.8.5", + "syn 1.0.109", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -6406,45 +6331,18 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "tstr" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f8e0294f14baae476d0dd0a2d780b2e24d66e349a9de876f5126777a37bdba7" -dependencies = [ - "tstr_proc_macros", -] - -[[package]] -name = "tstr_proc_macros" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" - [[package]] name = "twox-hash" version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" -[[package]] -name = "typed-arena" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" - [[package]] name = "typenum" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" -[[package]] -name = "typewit" -version = "1.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71" - [[package]] name = "typify" version = "0.5.0" diff --git a/Cargo.toml b/Cargo.toml index 3bcf17d8ed657..c09225ba6d839 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,7 +79,7 @@ repository = "https://github.com/apache/datafusion" # Define Minimum Supported Rust Version (MSRV) rust-version = "1.88.0" # Define DataFusion version -version = "52.1.0" +version = "53.0.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -112,43 +112,43 @@ chrono = { version = "0.4.44", default-features = false } criterion = "0.8" ctor = "0.6.3" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "52.1.0", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "52.1.0" } -datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "52.1.0" } -datafusion-common = { path = "datafusion/common", version = "52.1.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "52.1.0" } -datafusion-datasource = { path = "datafusion/datasource", version = "52.1.0", default-features = false } -datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "52.1.0", default-features = false } -datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "52.1.0", default-features = false } -datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "52.1.0", default-features = false } -datafusion-datasource-json = { path = "datafusion/datasource-json", version = "52.1.0", default-features = false } -datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "52.1.0", default-features = false } -datafusion-doc = { path = "datafusion/doc", version = "52.1.0" } -datafusion-execution = { path = "datafusion/execution", version = "52.1.0", default-features = false } -datafusion-expr = { path = "datafusion/expr", version = "52.1.0", default-features = false } -datafusion-expr-common = { path = "datafusion/expr-common", version = "52.1.0" } -datafusion-ffi = { path = "datafusion/ffi", version = "52.1.0" } -datafusion-functions = { path = "datafusion/functions", version = "52.1.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "52.1.0" } -datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "52.1.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "52.1.0", default-features = false } -datafusion-functions-table = { path = "datafusion/functions-table", version = "52.1.0" } -datafusion-functions-window = { path = "datafusion/functions-window", version = "52.1.0" } -datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "52.1.0" } -datafusion-macros = { path = "datafusion/macros", version = "52.1.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "52.1.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "52.1.0", default-features = false } -datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "52.1.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "52.1.0", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "52.1.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "52.1.0" } -datafusion-proto = { path = "datafusion/proto", version = "52.1.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "52.1.0" } -datafusion-pruning = { path = "datafusion/pruning", version = "52.1.0" } -datafusion-session = { path = "datafusion/session", version = "52.1.0" } -datafusion-spark = { path = "datafusion/spark", version = "52.1.0" } -datafusion-sql = { path = "datafusion/sql", version = "52.1.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "52.1.0" } +datafusion = { path = "datafusion/core", version = "53.0.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "53.0.0" } +datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "53.0.0" } +datafusion-common = { path = "datafusion/common", version = "53.0.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "53.0.0" } +datafusion-datasource = { path = "datafusion/datasource", version = "53.0.0", default-features = false } +datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "53.0.0", default-features = false } +datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "53.0.0", default-features = false } +datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "53.0.0", default-features = false } +datafusion-datasource-json = { path = "datafusion/datasource-json", version = "53.0.0", default-features = false } +datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "53.0.0", default-features = false } +datafusion-doc = { path = "datafusion/doc", version = "53.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "53.0.0", default-features = false } +datafusion-expr = { path = "datafusion/expr", version = "53.0.0", default-features = false } +datafusion-expr-common = { path = "datafusion/expr-common", version = "53.0.0" } +datafusion-ffi = { path = "datafusion/ffi", version = "53.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "53.0.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "53.0.0" } +datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "53.0.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "53.0.0", default-features = false } +datafusion-functions-table = { path = "datafusion/functions-table", version = "53.0.0" } +datafusion-functions-window = { path = "datafusion/functions-window", version = "53.0.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "53.0.0" } +datafusion-macros = { path = "datafusion/macros", version = "53.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "53.0.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "53.0.0", default-features = false } +datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "53.0.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "53.0.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "53.0.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "53.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "53.0.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "53.0.0" } +datafusion-pruning = { path = "datafusion/pruning", version = "53.0.0" } +datafusion-session = { path = "datafusion/session", version = "53.0.0" } +datafusion-spark = { path = "datafusion/spark", version = "53.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "53.0.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "53.0.0" } doc-comment = "0.3" env_logger = "0.11" diff --git a/datafusion-examples/examples/ffi/ffi_example_table_provider/Cargo.toml b/datafusion-examples/examples/ffi/ffi_example_table_provider/Cargo.toml index e2d0e3fa6744d..3cfa6dcf90f18 100644 --- a/datafusion-examples/examples/ffi/ffi_example_table_provider/Cargo.toml +++ b/datafusion-examples/examples/ffi/ffi_example_table_provider/Cargo.toml @@ -22,7 +22,6 @@ edition = { workspace = true } publish = false [dependencies] -abi_stable = "0.11.3" arrow = { workspace = true } datafusion = { workspace = true } datafusion-ffi = { workspace = true } diff --git a/datafusion-examples/examples/ffi/ffi_example_table_provider/src/lib.rs b/datafusion-examples/examples/ffi/ffi_example_table_provider/src/lib.rs index eb217ef9e4832..7894e97f3796d 100644 --- a/datafusion-examples/examples/ffi/ffi_example_table_provider/src/lib.rs +++ b/datafusion-examples/examples/ffi/ffi_example_table_provider/src/lib.rs @@ -17,13 +17,12 @@ use std::sync::Arc; -use abi_stable::{export_root_module, prefix_type::PrefixTypeTrait}; use arrow::array::RecordBatch; use arrow::datatypes::{DataType, Field, Schema}; use datafusion::{common::record_batch, datasource::MemTable}; use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; use datafusion_ffi::table_provider::FFI_TableProvider; -use ffi_module_interface::{TableProviderModule, TableProviderModuleRef}; +use ffi_module_interface::TableProviderModule; fn create_record_batch(start_value: i32, num_values: usize) -> RecordBatch { let end_value = start_value + num_values as i32; @@ -56,11 +55,10 @@ extern "C" fn construct_simple_table_provider( FFI_TableProvider::new_with_ffi_codec(Arc::new(table_provider), true, None, codec) } -#[export_root_module] +#[unsafe(no_mangle)] /// This defines the entry point for using the module. -pub fn get_simple_memory_table() -> TableProviderModuleRef { +pub extern "C" fn ffi_example_get_module() -> TableProviderModule { TableProviderModule { create_table: construct_simple_table_provider, } - .leak_into_prefix() } diff --git a/datafusion-examples/examples/ffi/ffi_module_interface/Cargo.toml b/datafusion-examples/examples/ffi/ffi_module_interface/Cargo.toml index fe4902711241e..0244cb2a5ed15 100644 --- a/datafusion-examples/examples/ffi/ffi_module_interface/Cargo.toml +++ b/datafusion-examples/examples/ffi/ffi_module_interface/Cargo.toml @@ -25,5 +25,4 @@ publish = false workspace = true [dependencies] -abi_stable = "0.11.3" datafusion-ffi = { workspace = true } diff --git a/datafusion-examples/examples/ffi/ffi_module_interface/src/lib.rs b/datafusion-examples/examples/ffi/ffi_module_interface/src/lib.rs index 3b2b9e1871dae..54a59c9e5d073 100644 --- a/datafusion-examples/examples/ffi/ffi_module_interface/src/lib.rs +++ b/datafusion-examples/examples/ffi/ffi_module_interface/src/lib.rs @@ -15,36 +15,17 @@ // specific language governing permissions and limitations // under the License. -use abi_stable::{ - StableAbi, declare_root_module_statics, - library::{LibraryError, RootModule}, - package_version_strings, - sabi_types::VersionStrings, -}; use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; use datafusion_ffi::table_provider::FFI_TableProvider; -#[repr(C)] -#[derive(StableAbi)] -#[sabi(kind(Prefix(prefix_ref = TableProviderModuleRef)))] /// This struct defines the module interfaces. It is to be shared by /// both the module loading program and library that implements the /// module. It is possible to move this definition into the loading /// program and reference it in the modules, but this example shows /// how a user may wish to separate these concerns. +#[repr(C)] pub struct TableProviderModule { /// Constructs the table provider pub create_table: extern "C" fn(codec: FFI_LogicalExtensionCodec) -> FFI_TableProvider, } - -impl RootModule for TableProviderModuleRef { - declare_root_module_statics! {TableProviderModuleRef} - const BASE_NAME: &'static str = "ffi_example_table_provider"; - const NAME: &'static str = "ffi_example_table_provider"; - const VERSION_STRINGS: VersionStrings = package_version_strings!(); - - fn initialization(self) -> Result { - Ok(self) - } -} diff --git a/datafusion-examples/examples/ffi/ffi_module_loader/Cargo.toml b/datafusion-examples/examples/ffi/ffi_module_loader/Cargo.toml index 8d7434dca211b..48821a9310769 100644 --- a/datafusion-examples/examples/ffi/ffi_module_loader/Cargo.toml +++ b/datafusion-examples/examples/ffi/ffi_module_loader/Cargo.toml @@ -25,8 +25,8 @@ publish = false workspace = true [dependencies] -abi_stable = "0.11.3" datafusion = { workspace = true } datafusion-ffi = { workspace = true } ffi_module_interface = { path = "../ffi_module_interface" } +libloading = "0.8" tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] } diff --git a/datafusion-examples/examples/ffi/ffi_module_loader/src/main.rs b/datafusion-examples/examples/ffi/ffi_module_loader/src/main.rs index 8ce5b156df3b1..3a4be813da5d4 100644 --- a/datafusion-examples/examples/ffi/ffi_module_loader/src/main.rs +++ b/datafusion-examples/examples/ffi/ffi_module_loader/src/main.rs @@ -18,28 +18,47 @@ use std::sync::Arc; use datafusion::{ + datasource::TableProvider, error::{DataFusionError, Result}, + execution::TaskContextProvider, prelude::SessionContext, }; - -use abi_stable::library::{RootModule, development_utils::compute_library_path}; -use datafusion::datasource::TableProvider; -use datafusion::execution::TaskContextProvider; use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; -use ffi_module_interface::TableProviderModuleRef; +use ffi_module_interface::TableProviderModule; #[tokio::main] async fn main() -> Result<()> { // Find the location of the library. This is specific to the build environment, // so you will need to change the approach here based on your use case. - let target: &std::path::Path = "../../../../target/".as_ref(); - let library_path = compute_library_path::(target) - .map_err(|e| DataFusionError::External(Box::new(e)))?; + let lib_prefix = if cfg!(target_os = "windows") { + "" + } else { + "lib" + }; + let lib_ext = if cfg!(target_os = "macos") { + "dylib" + } else if cfg!(target_os = "windows") { + "dll" + } else { + "so" + }; + + let library_path = format!( + "../../../../target/debug/{lib_prefix}ffi_example_table_provider.{lib_ext}" + ); + + // Load the library using libloading + let lib = unsafe { + libloading::Library::new(&library_path) + .map_err(|e| DataFusionError::External(Box::new(e)))? + }; + + let get_module: libloading::Symbol TableProviderModule> = unsafe { + lib.get(b"ffi_example_get_module") + .map_err(|e| DataFusionError::External(Box::new(e)))? + }; - // Load the module - let table_provider_module = - TableProviderModuleRef::load_from_directory(&library_path) - .map_err(|e| DataFusionError::External(Box::new(e)))?; + let table_provider_module = get_module(); let ctx = Arc::new(SessionContext::new()); let codec = FFI_LogicalExtensionCodec::new_default( @@ -48,12 +67,7 @@ async fn main() -> Result<()> { // By calling the code below, the table provided will be created within // the module's code. - let ffi_table_provider = - table_provider_module - .create_table() - .ok_or(DataFusionError::NotImplemented( - "External table provider failed to implement create_table".to_string(), - ))?(codec); + let ffi_table_provider = (table_provider_module.create_table)(codec); // In order to access the table provider within this executable, we need to // turn it into a `TableProvider`. diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 3d0a76a182697..8965948a0f4e2 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -88,8 +88,8 @@ recursive_protection = [ "datafusion-optimizer/recursive_protection", "datafusion-physical-optimizer/recursive_protection", "datafusion-physical-expr/recursive_protection", - "datafusion-sql/recursive_protection", - "sqlparser/recursive-protection", + "datafusion-sql?/recursive_protection", + "sqlparser?/recursive-protection", ] serde = [ "dep:serde", diff --git a/datafusion/expr-common/src/operator.rs b/datafusion/expr-common/src/operator.rs index 33512b0c354d6..427069b326f9d 100644 --- a/datafusion/expr-common/src/operator.rs +++ b/datafusion/expr-common/src/operator.rs @@ -140,6 +140,10 @@ pub enum Operator { /// /// Not implemented in DataFusion yet. QuestionPipe, + /// Colon operator, like `:` + /// + /// Not implemented in DataFusion yet. + Colon, } impl Operator { @@ -188,7 +192,8 @@ impl Operator { | Operator::AtQuestion | Operator::Question | Operator::QuestionAnd - | Operator::QuestionPipe => None, + | Operator::QuestionPipe + | Operator::Colon => None, } } @@ -283,7 +288,8 @@ impl Operator { | Operator::AtQuestion | Operator::Question | Operator::QuestionAnd - | Operator::QuestionPipe => None, + | Operator::QuestionPipe + | Operator::Colon => None, } } @@ -323,7 +329,8 @@ impl Operator { | Operator::AtQuestion | Operator::Question | Operator::QuestionAnd - | Operator::QuestionPipe => 30, + | Operator::QuestionPipe + | Operator::Colon => 30, Operator::Plus | Operator::Minus => 40, Operator::Multiply | Operator::Divide | Operator::Modulo => 45, } @@ -369,7 +376,8 @@ impl Operator { | Operator::AtQuestion | Operator::Question | Operator::QuestionAnd - | Operator::QuestionPipe => true, + | Operator::QuestionPipe + | Operator::Colon => true, // E.g. `TRUE OR NULL` is `TRUE` Operator::Or @@ -429,6 +437,7 @@ impl fmt::Display for Operator { Operator::Question => "?", Operator::QuestionAnd => "?&", Operator::QuestionPipe => "?|", + Operator::Colon => ":", }; write!(f, "{display}") } diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index c6ac86cd396c4..e696545ea6caf 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -324,6 +324,9 @@ impl<'a> BinaryTypeCoercer<'a> { ) } }, + Colon => { + Ok(Signature { lhs: lhs.clone(), rhs: rhs.clone(), ret: lhs.clone() }) + }, IntegerDivide | Arrow | LongArrow | HashArrow | HashLongArrow | HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe => { not_impl_err!("Operator {} is not yet supported", self.op) diff --git a/datafusion/ffi/Cargo.toml b/datafusion/ffi/Cargo.toml index 28e1b2ee5681f..60d8b51d0179e 100644 --- a/datafusion/ffi/Cargo.toml +++ b/datafusion/ffi/Cargo.toml @@ -44,10 +44,9 @@ crate-type = ["cdylib", "rlib"] # It increases build times and library binary size for users. [dependencies] -abi_stable = "0.11.3" arrow = { workspace = true, features = ["ffi"] } arrow-schema = { workspace = true } -async-ffi = { version = "0.5.0", features = ["abi_stable"] } +async-ffi = { version = "0.5.0" } async-trait = { workspace = true } datafusion-catalog = { workspace = true } datafusion-common = { workspace = true } @@ -69,6 +68,7 @@ futures = { workspace = true } log = { workspace = true } prost = { workspace = true } semver = "1.0.27" +stabby = "72.1.1" tokio = { workspace = true } [dev-dependencies] diff --git a/datafusion/ffi/src/arrow_wrappers.rs b/datafusion/ffi/src/arrow_wrappers.rs index c83e412310e7f..1c921b0f83b1e 100644 --- a/datafusion/ffi/src/arrow_wrappers.rs +++ b/datafusion/ffi/src/arrow_wrappers.rs @@ -17,7 +17,6 @@ use std::sync::Arc; -use abi_stable::StableAbi; use arrow::array::{ArrayRef, make_array}; use arrow::datatypes::{Schema, SchemaRef}; use arrow::error::ArrowError; @@ -26,10 +25,10 @@ use datafusion_common::{DataFusionError, ScalarValue}; use log::error; /// This is a wrapper struct around FFI_ArrowSchema simply to indicate -/// to the StableAbi macros that the underlying struct is FFI safe. +/// that the underlying struct is FFI safe. #[repr(C)] -#[derive(Debug, StableAbi)] -pub struct WrappedSchema(#[sabi(unsafe_opaque_field)] pub FFI_ArrowSchema); +#[derive(Debug)] +pub struct WrappedSchema(pub FFI_ArrowSchema); impl From for WrappedSchema { fn from(value: SchemaRef) -> Self { @@ -66,15 +65,13 @@ impl From for SchemaRef { } } -/// This is a wrapper struct for FFI_ArrowArray to indicate to StableAbi +/// This is a wrapper struct for FFI_ArrowArray to indicate /// that the struct is FFI Safe. For convenience, we also include the /// schema needed to create a record batch from the array. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct WrappedArray { - #[sabi(unsafe_opaque_field)] pub array: FFI_ArrowArray, - pub schema: WrappedSchema, } diff --git a/datafusion/ffi/src/catalog_provider.rs b/datafusion/ffi/src/catalog_provider.rs index 61e26f1663532..60346f1ab6042 100644 --- a/datafusion/ffi/src/catalog_provider.rs +++ b/datafusion/ffi/src/catalog_provider.rs @@ -19,45 +19,45 @@ use std::any::Any; use std::ffi::c_void; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RResult, RString, RVec}; use datafusion_catalog::{CatalogProvider, SchemaProvider}; use datafusion_common::error::Result; use datafusion_proto::logical_plan::{ DefaultLogicalExtensionCodec, LogicalExtensionCodec, }; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; use tokio::runtime::Handle; use crate::execution::FFI_TaskContextProvider; use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec; use crate::schema_provider::{FFI_SchemaProvider, ForeignSchemaProvider}; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiOption, FfiResult}; use crate::{df_result, rresult_return}; /// A stable struct for sharing [`CatalogProvider`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_CatalogProvider { - pub schema_names: unsafe extern "C" fn(provider: &Self) -> RVec, + pub schema_names: unsafe extern "C" fn(provider: &Self) -> StabbyVec, pub schema: unsafe extern "C" fn( provider: &Self, - name: RString, - ) -> ROption, + name: StabbyString, + ) -> FfiOption, pub register_schema: unsafe extern "C" fn( provider: &Self, - name: RString, + name: StabbyString, schema: &FFI_SchemaProvider, ) - -> FFIResult>, + -> FFIResult>, - pub deregister_schema: unsafe extern "C" fn( - provider: &Self, - name: RString, - cascade: bool, - ) - -> FFIResult>, + pub deregister_schema: + unsafe extern "C" fn( + provider: &Self, + name: StabbyString, + cascade: bool, + ) -> FFIResult>, pub logical_codec: FFI_LogicalExtensionCodec, @@ -107,7 +107,7 @@ impl FFI_CatalogProvider { unsafe extern "C" fn schema_names_fn_wrapper( provider: &FFI_CatalogProvider, -) -> RVec { +) -> StabbyVec { unsafe { let names = provider.inner().schema_names(); names.into_iter().map(|s| s.into()).collect() @@ -116,8 +116,8 @@ unsafe extern "C" fn schema_names_fn_wrapper( unsafe extern "C" fn schema_fn_wrapper( provider: &FFI_CatalogProvider, - name: RString, -) -> ROption { + name: StabbyString, +) -> FfiOption { unsafe { let maybe_schema = provider.inner().schema(name.as_str()); maybe_schema @@ -134,9 +134,9 @@ unsafe extern "C" fn schema_fn_wrapper( unsafe extern "C" fn register_schema_fn_wrapper( provider: &FFI_CatalogProvider, - name: RString, + name: StabbyString, schema: &FFI_SchemaProvider, -) -> FFIResult> { +) -> FFIResult> { unsafe { let runtime = provider.runtime(); let inner_provider = provider.inner(); @@ -153,15 +153,15 @@ unsafe extern "C" fn register_schema_fn_wrapper( }) .into(); - RResult::ROk(returned_schema) + FfiResult::Ok(returned_schema) } } unsafe extern "C" fn deregister_schema_fn_wrapper( provider: &FFI_CatalogProvider, - name: RString, + name: StabbyString, cascade: bool, -) -> FFIResult> { +) -> FFIResult> { unsafe { let runtime = provider.runtime(); let inner_provider = provider.inner(); @@ -169,7 +169,7 @@ unsafe extern "C" fn deregister_schema_fn_wrapper( let maybe_schema = rresult_return!(inner_provider.deregister_schema(name.as_str(), cascade)); - RResult::ROk( + FfiResult::Ok( maybe_schema .map(|schema| { FFI_SchemaProvider::new_with_ffi_codec( diff --git a/datafusion/ffi/src/catalog_provider_list.rs b/datafusion/ffi/src/catalog_provider_list.rs index 40f8be3871bb9..355f46eed00fe 100644 --- a/datafusion/ffi/src/catalog_provider_list.rs +++ b/datafusion/ffi/src/catalog_provider_list.rs @@ -19,35 +19,36 @@ use std::any::Any; use std::ffi::c_void; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RString, RVec}; use datafusion_catalog::{CatalogProvider, CatalogProviderList}; use datafusion_proto::logical_plan::{ DefaultLogicalExtensionCodec, LogicalExtensionCodec, }; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; use tokio::runtime::Handle; use crate::catalog_provider::{FFI_CatalogProvider, ForeignCatalogProvider}; use crate::execution::FFI_TaskContextProvider; use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec; +use crate::util::FfiOption; /// A stable struct for sharing [`CatalogProviderList`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_CatalogProviderList { /// Register a catalog pub register_catalog: unsafe extern "C" fn( &Self, - name: RString, + name: StabbyString, catalog: &FFI_CatalogProvider, - ) -> ROption, + ) -> FfiOption, /// List of existing catalogs - pub catalog_names: unsafe extern "C" fn(&Self) -> RVec, + pub catalog_names: unsafe extern "C" fn(&Self) -> StabbyVec, /// Access a catalog pub catalog: - unsafe extern "C" fn(&Self, name: RString) -> ROption, + unsafe extern "C" fn(&Self, name: StabbyString) -> FfiOption, pub logical_codec: FFI_LogicalExtensionCodec, @@ -97,7 +98,7 @@ impl FFI_CatalogProviderList { unsafe extern "C" fn catalog_names_fn_wrapper( provider: &FFI_CatalogProviderList, -) -> RVec { +) -> StabbyVec { unsafe { let names = provider.inner().catalog_names(); names.into_iter().map(|s| s.into()).collect() @@ -106,9 +107,9 @@ unsafe extern "C" fn catalog_names_fn_wrapper( unsafe extern "C" fn register_catalog_fn_wrapper( provider: &FFI_CatalogProviderList, - name: RString, + name: StabbyString, catalog: &FFI_CatalogProvider, -) -> ROption { +) -> FfiOption { unsafe { let runtime = provider.runtime(); let inner_provider = provider.inner(); @@ -129,8 +130,8 @@ unsafe extern "C" fn register_catalog_fn_wrapper( unsafe extern "C" fn catalog_fn_wrapper( provider: &FFI_CatalogProviderList, - name: RString, -) -> ROption { + name: StabbyString, +) -> FfiOption { unsafe { let runtime = provider.runtime(); let inner_provider = provider.inner(); diff --git a/datafusion/ffi/src/config/extension_options.rs b/datafusion/ffi/src/config/extension_options.rs index 48fd4e710921a..f9ce7a2cc9eee 100644 --- a/datafusion/ffi/src/config/extension_options.rs +++ b/datafusion/ffi/src/config/extension_options.rs @@ -19,12 +19,15 @@ use std::any::Any; use std::collections::HashMap; use std::ffi::c_void; -use abi_stable::StableAbi; -use abi_stable::std_types::{RResult, RStr, RString, RVec, Tuple2}; use datafusion_common::config::{ConfigEntry, ConfigExtension, ExtensionOptions}; use datafusion_common::{Result, exec_err}; +use stabby::str::Str as StabbyStr; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; + use crate::df_result; +use crate::util::{FFIResult, FfiResult}; /// A stable struct for sharing [`ExtensionOptions`] across FFI boundaries. /// @@ -38,17 +41,20 @@ use crate::df_result; /// are stored with the full path prefix to avoid overwriting values when using /// multiple extensions. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_ExtensionOptions { /// Return a deep clone of this [`ExtensionOptions`] pub cloned: unsafe extern "C" fn(&Self) -> FFI_ExtensionOptions, /// Set the given `key`, `value` pair - pub set: - unsafe extern "C" fn(&mut Self, key: RStr, value: RStr) -> RResult<(), RString>, + pub set: unsafe extern "C" fn( + &mut Self, + key: StabbyStr, + value: StabbyStr, + ) -> FFIResult<()>, /// Returns the [`ConfigEntry`] stored in this [`ExtensionOptions`] - pub entries: unsafe extern "C" fn(&Self) -> RVec>, + pub entries: unsafe extern "C" fn(&Self) -> StabbyVec<(StabbyString, StabbyString)>, /// Release the memory of the private data when it is no longer being used. pub release: unsafe extern "C" fn(&mut Self), @@ -91,20 +97,22 @@ unsafe extern "C" fn cloned_fn_wrapper( unsafe extern "C" fn set_fn_wrapper( options: &mut FFI_ExtensionOptions, - key: RStr, - value: RStr, -) -> RResult<(), RString> { - let _ = options.inner_mut().insert(key.into(), value.into()); - RResult::ROk(()) + key: StabbyStr, + value: StabbyStr, +) -> FFIResult<()> { + let _ = options + .inner_mut() + .insert(key.as_str().into(), value.as_str().into()); + FfiResult::Ok(()) } unsafe extern "C" fn entries_fn_wrapper( options: &FFI_ExtensionOptions, -) -> RVec> { +) -> StabbyVec<(StabbyString, StabbyString)> { options .inner() .iter() - .map(|(key, value)| (key.to_owned().into(), value.to_owned().into()).into()) + .map(|(key, value)| (key.to_owned().into(), value.to_owned().into())) .collect() } diff --git a/datafusion/ffi/src/config/mod.rs b/datafusion/ffi/src/config/mod.rs index 850a4dc337336..e08770f370bb3 100644 --- a/datafusion/ffi/src/config/mod.rs +++ b/datafusion/ffi/src/config/mod.rs @@ -17,12 +17,12 @@ pub mod extension_options; -use abi_stable::StableAbi; -use abi_stable::std_types::{RHashMap, RString}; use datafusion_common::config::{ ConfigExtension, ConfigOptions, ExtensionOptions, TableOptions, }; use datafusion_common::{DataFusionError, Result}; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; use crate::config::extension_options::FFI_ExtensionOptions; @@ -32,16 +32,16 @@ use crate::config::extension_options::FFI_ExtensionOptions; /// than local extensions. The trait [`ExtensionOptionsFFIProvider`] can /// be used to simplify accessing FFI extensions. #[repr(C)] -#[derive(Debug, Clone, StableAbi)] +#[derive(Debug, Clone)] pub struct FFI_ConfigOptions { - base_options: RHashMap, + base_options: StabbyVec<(StabbyString, StabbyString)>, extensions: FFI_ExtensionOptions, } impl From<&ConfigOptions> for FFI_ConfigOptions { fn from(options: &ConfigOptions) -> Self { - let base_options: RHashMap = options + let base_options: StabbyVec<(StabbyString, StabbyString)> = options .entries() .into_iter() .filter_map(|entry| entry.value.map(|value| (entry.key, value))) @@ -120,16 +120,16 @@ impl ExtensionOptionsFFIProvider for TableOptions { /// than local extensions. The trait [`ExtensionOptionsFFIProvider`] can /// be used to simplify accessing FFI extensions. #[repr(C)] -#[derive(Debug, Clone, StableAbi)] +#[derive(Debug, Clone)] pub struct FFI_TableOptions { - base_options: RHashMap, + base_options: StabbyVec<(StabbyString, StabbyString)>, extensions: FFI_ExtensionOptions, } impl From<&TableOptions> for FFI_TableOptions { fn from(options: &TableOptions) -> Self { - let base_options: RHashMap = options + let base_options: StabbyVec<(StabbyString, StabbyString)> = options .entries() .into_iter() .filter_map(|entry| entry.value.map(|value| (entry.key, value))) diff --git a/datafusion/ffi/src/execution/task_ctx.rs b/datafusion/ffi/src/execution/task_ctx.rs index e0598db0a0170..bd9053dd17283 100644 --- a/datafusion/ffi/src/execution/task_ctx.rs +++ b/datafusion/ffi/src/execution/task_ctx.rs @@ -18,9 +18,6 @@ use std::ffi::c_void; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::pmr::ROption; -use abi_stable::std_types::{RHashMap, RString}; use datafusion_execution::TaskContext; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::RuntimeEnv; @@ -28,33 +25,39 @@ use datafusion_expr::{ AggregateUDF, AggregateUDFImpl, ScalarUDF, ScalarUDFImpl, WindowUDF, WindowUDFImpl, }; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; + use crate::session::config::FFI_SessionConfig; use crate::udaf::FFI_AggregateUDF; use crate::udf::FFI_ScalarUDF; use crate::udwf::FFI_WindowUDF; +use crate::util::FfiOption; /// A stable struct for sharing [`TaskContext`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_TaskContext { /// Return the session ID. - pub session_id: unsafe extern "C" fn(&Self) -> RString, + pub session_id: unsafe extern "C" fn(&Self) -> StabbyString, /// Return the task ID. - pub task_id: unsafe extern "C" fn(&Self) -> ROption, + pub task_id: unsafe extern "C" fn(&Self) -> FfiOption, /// Return the session configuration. pub session_config: unsafe extern "C" fn(&Self) -> FFI_SessionConfig, - /// Returns a hashmap of names to scalar functions. - pub scalar_functions: unsafe extern "C" fn(&Self) -> RHashMap, + /// Returns a vec of name-function pairs for scalar functions. + pub scalar_functions: + unsafe extern "C" fn(&Self) -> StabbyVec<(StabbyString, FFI_ScalarUDF)>, - /// Returns a hashmap of names to aggregate functions. + /// Returns a vec of name-function pairs for aggregate functions. pub aggregate_functions: - unsafe extern "C" fn(&Self) -> RHashMap, + unsafe extern "C" fn(&Self) -> StabbyVec<(StabbyString, FFI_AggregateUDF)>, - /// Returns a hashmap of names to window functions. - pub window_functions: unsafe extern "C" fn(&Self) -> RHashMap, + /// Returns a vec of name-function pairs for window functions. + pub window_functions: + unsafe extern "C" fn(&Self) -> StabbyVec<(StabbyString, FFI_WindowUDF)>, /// Release the memory of the private data when it is no longer being used. pub release: unsafe extern "C" fn(arg: &mut Self), @@ -82,14 +85,16 @@ impl FFI_TaskContext { } } -unsafe extern "C" fn session_id_fn_wrapper(ctx: &FFI_TaskContext) -> RString { +unsafe extern "C" fn session_id_fn_wrapper(ctx: &FFI_TaskContext) -> StabbyString { unsafe { let ctx = ctx.inner(); ctx.session_id().into() } } -unsafe extern "C" fn task_id_fn_wrapper(ctx: &FFI_TaskContext) -> ROption { +unsafe extern "C" fn task_id_fn_wrapper( + ctx: &FFI_TaskContext, +) -> FfiOption { unsafe { let ctx = ctx.inner(); ctx.task_id().map(|s| s.as_str().into()).into() @@ -107,7 +112,7 @@ unsafe extern "C" fn session_config_fn_wrapper( unsafe extern "C" fn scalar_functions_fn_wrapper( ctx: &FFI_TaskContext, -) -> RHashMap { +) -> StabbyVec<(StabbyString, FFI_ScalarUDF)> { unsafe { let ctx = ctx.inner(); ctx.scalar_functions() @@ -119,7 +124,7 @@ unsafe extern "C" fn scalar_functions_fn_wrapper( unsafe extern "C" fn aggregate_functions_fn_wrapper( ctx: &FFI_TaskContext, -) -> RHashMap { +) -> StabbyVec<(StabbyString, FFI_AggregateUDF)> { unsafe { let ctx = ctx.inner(); ctx.aggregate_functions() @@ -136,7 +141,7 @@ unsafe extern "C" fn aggregate_functions_fn_wrapper( unsafe extern "C" fn window_functions_fn_wrapper( ctx: &FFI_TaskContext, -) -> RHashMap { +) -> StabbyVec<(StabbyString, FFI_WindowUDF)> { unsafe { let ctx = ctx.inner(); ctx.window_functions() @@ -198,7 +203,7 @@ impl From for Arc { let udf = >::from(&kv_pair.1); ( - kv_pair.0.into_string(), + kv_pair.0.to_string(), Arc::new(ScalarUDF::new_from_shared_impl(udf)), ) }) @@ -209,7 +214,7 @@ impl From for Arc { let udaf = >::from(&kv_pair.1); ( - kv_pair.0.into_string(), + kv_pair.0.to_string(), Arc::new(AggregateUDF::new_from_shared_impl(udaf)), ) }) @@ -220,7 +225,7 @@ impl From for Arc { let udwf = >::from(&kv_pair.1); ( - kv_pair.0.into_string(), + kv_pair.0.to_string(), Arc::new(WindowUDF::new_from_shared_impl(udwf)), ) }) diff --git a/datafusion/ffi/src/execution/task_ctx_provider.rs b/datafusion/ffi/src/execution/task_ctx_provider.rs index 5d4eaac83975a..6ab010f4fb97e 100644 --- a/datafusion/ffi/src/execution/task_ctx_provider.rs +++ b/datafusion/ffi/src/execution/task_ctx_provider.rs @@ -18,7 +18,6 @@ use std::ffi::c_void; use std::sync::{Arc, Weak}; -use abi_stable::StableAbi; use datafusion_common::{DataFusionError, ffi_datafusion_err}; use datafusion_execution::{TaskContext, TaskContextProvider}; @@ -32,7 +31,7 @@ use crate::{df_result, rresult}; /// data passed across the FFI boundary. See the crate README for /// additional information. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_TaskContextProvider { /// Retrieve the current [`TaskContext`] provided the provider has not /// gone out of scope. This function will return an error if the weakly diff --git a/datafusion/ffi/src/execution_plan.rs b/datafusion/ffi/src/execution_plan.rs index 524d8b4b6b976..ea804419aa3ec 100644 --- a/datafusion/ffi/src/execution_plan.rs +++ b/datafusion/ffi/src/execution_plan.rs @@ -19,13 +19,14 @@ use std::ffi::c_void; use std::pin::Pin; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{RString, RVec}; +use datafusion_common::tree_node::TreeNodeRecursion; use datafusion_common::{DataFusionError, Result}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, }; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; use tokio::runtime::Handle; use crate::execution::FFI_TaskContext; @@ -36,16 +37,16 @@ use crate::{df_result, rresult}; /// A stable struct for sharing a [`ExecutionPlan`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_ExecutionPlan { /// Return the plan properties pub properties: unsafe extern "C" fn(plan: &Self) -> FFI_PlanProperties, /// Return a vector of children plans - pub children: unsafe extern "C" fn(plan: &Self) -> RVec, + pub children: unsafe extern "C" fn(plan: &Self) -> StabbyVec, /// Return the plan name. - pub name: unsafe extern "C" fn(plan: &Self) -> RString, + pub name: unsafe extern "C" fn(plan: &Self) -> StabbyString, /// Execute the plan and return a record batch stream. Errors /// will be returned as a string. @@ -95,19 +96,16 @@ unsafe extern "C" fn properties_fn_wrapper( unsafe extern "C" fn children_fn_wrapper( plan: &FFI_ExecutionPlan, -) -> RVec { +) -> StabbyVec { unsafe { let private_data = plan.private_data as *const ExecutionPlanPrivateData; let plan = &(*private_data).plan; let runtime = &(*private_data).runtime; - let children: Vec<_> = plan - .children() + plan.children() .into_iter() .map(|child| FFI_ExecutionPlan::new(Arc::clone(child), runtime.clone())) - .collect(); - - children.into() + .collect() } } @@ -129,7 +127,7 @@ unsafe extern "C" fn execute_fn_wrapper( } } -unsafe extern "C" fn name_fn_wrapper(plan: &FFI_ExecutionPlan) -> RString { +unsafe extern "C" fn name_fn_wrapper(plan: &FFI_ExecutionPlan) -> StabbyString { plan.inner().name().into() } diff --git a/datafusion/ffi/src/expr/columnar_value.rs b/datafusion/ffi/src/expr/columnar_value.rs index 7ad7645ecb6cf..19ad9ff7a3b79 100644 --- a/datafusion/ffi/src/expr/columnar_value.rs +++ b/datafusion/ffi/src/expr/columnar_value.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use abi_stable::StableAbi; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::ColumnarValue; @@ -23,8 +22,8 @@ use crate::arrow_wrappers::WrappedArray; /// A stable struct for sharing [`ColumnarValue`] across FFI boundaries. /// Scalar values are passed as an Arrow array of length 1. -#[repr(C)] -#[derive(Debug, StableAbi)] +#[repr(C, u8)] +#[derive(Debug)] pub enum FFI_ColumnarValue { Array(WrappedArray), Scalar(WrappedArray), diff --git a/datafusion/ffi/src/expr/distribution.rs b/datafusion/ffi/src/expr/distribution.rs index b9ebfc2362c7a..ca760f16ad17c 100644 --- a/datafusion/ffi/src/expr/distribution.rs +++ b/datafusion/ffi/src/expr/distribution.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use abi_stable::StableAbi; use datafusion_common::DataFusionError; use datafusion_expr::statistics::{ BernoulliDistribution, Distribution, ExponentialDistribution, GaussianDistribution, @@ -28,7 +27,7 @@ use crate::expr::interval::FFI_Interval; /// A stable struct for sharing [`Distribution`] across FFI boundaries. /// See ['Distribution'] for the meaning of each variant. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] #[expect(clippy::large_enum_variant)] pub enum FFI_Distribution { Uniform(FFI_UniformDistribution), @@ -67,13 +66,13 @@ impl TryFrom for Distribution { } #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_UniformDistribution { interval: FFI_Interval, } #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_ExponentialDistribution { rate: WrappedArray, offset: WrappedArray, @@ -81,20 +80,20 @@ pub struct FFI_ExponentialDistribution { } #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_GaussianDistribution { mean: WrappedArray, variance: WrappedArray, } #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_BernoulliDistribution { p: WrappedArray, } #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_GenericDistribution { mean: WrappedArray, median: WrappedArray, diff --git a/datafusion/ffi/src/expr/expr_properties.rs b/datafusion/ffi/src/expr/expr_properties.rs index 199a399a6471f..5b37cc6a28535 100644 --- a/datafusion/ffi/src/expr/expr_properties.rs +++ b/datafusion/ffi/src/expr/expr_properties.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use abi_stable::StableAbi; use arrow_schema::SortOptions; use datafusion_common::DataFusionError; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; @@ -25,7 +24,7 @@ use crate::expr::interval::FFI_Interval; /// A stable struct for sharing [`ExprProperties`] across FFI boundaries. /// See [`ExprProperties`] for the meaning of each field. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_ExprProperties { sort_properties: FFI_SortProperties, range: FFI_Interval, @@ -60,7 +59,7 @@ impl TryFrom for ExprProperties { } #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub enum FFI_SortProperties { Ordered(FFI_SortOptions), Unordered, @@ -88,7 +87,7 @@ impl From<&FFI_SortProperties> for SortProperties { } #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_SortOptions { pub descending: bool, pub nulls_first: bool, diff --git a/datafusion/ffi/src/expr/interval.rs b/datafusion/ffi/src/expr/interval.rs index 450f3747a57f0..6334f7bb24d90 100644 --- a/datafusion/ffi/src/expr/interval.rs +++ b/datafusion/ffi/src/expr/interval.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use abi_stable::StableAbi; use datafusion_common::DataFusionError; use datafusion_expr::interval_arithmetic::Interval; @@ -25,7 +24,7 @@ use crate::arrow_wrappers::WrappedArray; /// See [`Interval`] for the meaning of each field. Scalar values /// are passed as Arrow arrays of length 1. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_Interval { lower: WrappedArray, upper: WrappedArray, diff --git a/datafusion/ffi/src/ffi_option.rs b/datafusion/ffi/src/ffi_option.rs new file mode 100644 index 0000000000000..e6f361a8de9e4 --- /dev/null +++ b/datafusion/ffi/src/ffi_option.rs @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! FFI-safe Option and Result types that do not require `IStable` bounds. +//! +//! stabby's `Option` and `Result` require `T: IStable` for niche +//! optimization. Many of our FFI structs contain self-referential function +//! pointers and cannot implement `IStable`. These simple `#[repr(C)]` types +//! provide the same FFI-safe semantics without that constraint. + +/// An FFI-safe option type. +#[repr(C, u8)] +#[derive(Debug, Clone)] +pub enum FfiOption { + Some(T), + None, +} + +impl From> for FfiOption { + fn from(opt: Option) -> Self { + match opt { + Some(v) => FfiOption::Some(v), + None => FfiOption::None, + } + } +} + +impl From> for Option { + fn from(opt: FfiOption) -> Self { + match opt { + FfiOption::Some(v) => Some(v), + FfiOption::None => None, + } + } +} + +impl FfiOption { + pub fn as_ref(&self) -> Option<&T> { + match self { + FfiOption::Some(v) => Some(v), + FfiOption::None => None, + } + } + + pub fn map U>(self, f: F) -> FfiOption { + match self { + FfiOption::Some(v) => FfiOption::Some(f(v)), + FfiOption::None => FfiOption::None, + } + } + + pub fn into_option(self) -> Option { + self.into() + } +} + +/// An FFI-safe result type. +#[repr(C, u8)] +#[derive(Debug, Clone)] +pub enum FfiResult { + Ok(T), + Err(E), +} + +impl From> for FfiResult { + fn from(res: Result) -> Self { + match res { + Ok(v) => FfiResult::Ok(v), + Err(e) => FfiResult::Err(e), + } + } +} + +impl From> for Result { + fn from(res: FfiResult) -> Self { + match res { + FfiResult::Ok(v) => Ok(v), + FfiResult::Err(e) => Err(e), + } + } +} + +impl FfiResult { + pub fn is_ok(&self) -> bool { + matches!(self, FfiResult::Ok(_)) + } + + pub fn is_err(&self) -> bool { + matches!(self, FfiResult::Err(_)) + } + + pub fn unwrap_err(self) -> E { + match self { + FfiResult::Err(e) => e, + FfiResult::Ok(_) => panic!("called unwrap_err on Ok"), + } + } + + pub fn map U>(self, f: F) -> FfiResult { + match self { + FfiResult::Ok(v) => FfiResult::Ok(f(v)), + FfiResult::Err(e) => FfiResult::Err(e), + } + } + + pub fn into_result(self) -> Result { + self.into() + } +} + +impl PartialEq for FfiResult { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (FfiResult::Ok(a), FfiResult::Ok(b)) => a == b, + (FfiResult::Err(a), FfiResult::Err(b)) => a == b, + _ => false, + } + } +} diff --git a/datafusion/ffi/src/insert_op.rs b/datafusion/ffi/src/insert_op.rs index 6471039105e80..b6b15e339dca2 100644 --- a/datafusion/ffi/src/insert_op.rs +++ b/datafusion/ffi/src/insert_op.rs @@ -15,34 +15,32 @@ // specific language governing permissions and limitations // under the License. -use abi_stable::StableAbi; use datafusion_expr::logical_plan::dml::InsertOp; /// FFI safe version of [`InsertOp`]. -#[repr(C)] -#[derive(StableAbi)] -pub enum FFI_InsertOp { +#[repr(u8)] +pub enum FFiInsertOp { Append, Overwrite, Replace, } -impl From for InsertOp { - fn from(value: FFI_InsertOp) -> Self { +impl From for InsertOp { + fn from(value: FFiInsertOp) -> Self { match value { - FFI_InsertOp::Append => InsertOp::Append, - FFI_InsertOp::Overwrite => InsertOp::Overwrite, - FFI_InsertOp::Replace => InsertOp::Replace, + FFiInsertOp::Append => InsertOp::Append, + FFiInsertOp::Overwrite => InsertOp::Overwrite, + FFiInsertOp::Replace => InsertOp::Replace, } } } -impl From for FFI_InsertOp { +impl From for FFiInsertOp { fn from(value: InsertOp) -> Self { match value { - InsertOp::Append => FFI_InsertOp::Append, - InsertOp::Overwrite => FFI_InsertOp::Overwrite, - InsertOp::Replace => FFI_InsertOp::Replace, + InsertOp::Append => FFiInsertOp::Append, + InsertOp::Overwrite => FFiInsertOp::Overwrite, + InsertOp::Replace => FFiInsertOp::Replace, } } } @@ -51,10 +49,10 @@ impl From for FFI_InsertOp { mod tests { use datafusion::logical_expr::dml::InsertOp; - use super::FFI_InsertOp; + use super::FFiInsertOp; fn test_round_trip_insert_op(insert_op: InsertOp) { - let ffi_insert_op: FFI_InsertOp = insert_op.into(); + let ffi_insert_op: FFiInsertOp = insert_op.into(); let round_trip: InsertOp = ffi_insert_op.into(); assert_eq!(insert_op, round_trip); diff --git a/datafusion/ffi/src/lib.rs b/datafusion/ffi/src/lib.rs index d7410e8483735..21b661b0d64a0 100644 --- a/datafusion/ffi/src/lib.rs +++ b/datafusion/ffi/src/lib.rs @@ -32,6 +32,7 @@ pub mod config; pub mod execution; pub mod execution_plan; pub mod expr; +pub mod ffi_option; pub mod insert_op; pub mod physical_expr; pub mod plan_properties; diff --git a/datafusion/ffi/src/physical_expr/mod.rs b/datafusion/ffi/src/physical_expr/mod.rs index d268dd613f987..93be4041ea64d 100644 --- a/datafusion/ffi/src/physical_expr/mod.rs +++ b/datafusion/ffi/src/physical_expr/mod.rs @@ -24,8 +24,6 @@ use std::fmt::{Display, Formatter}; use std::hash::{DefaultHasher, Hash, Hasher}; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RResult, RString, RVec}; use arrow::array::{ArrayRef, BooleanArray, RecordBatch}; use arrow::datatypes::SchemaRef; use arrow_schema::ffi::FFI_ArrowSchema; @@ -38,6 +36,9 @@ use datafusion_expr::statistics::Distribution; use datafusion_physical_expr::PhysicalExpr; use datafusion_physical_expr_common::physical_expr::fmt_sql; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; + use crate::arrow_wrappers::{WrappedArray, WrappedSchema}; use crate::expr::columnar_value::FFI_ColumnarValue; use crate::expr::distribution::FFI_Distribution; @@ -46,11 +47,11 @@ use crate::expr::interval::FFI_Interval; use crate::record_batch_stream::{ record_batch_to_wrapped_array, wrapped_array_to_record_batch, }; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiOption, FfiResult}; use crate::{df_result, rresult, rresult_return}; #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_PhysicalExpr { pub data_type: unsafe extern "C" fn( &Self, @@ -74,50 +75,53 @@ pub struct FFI_PhysicalExpr { selection: WrappedArray, ) -> FFIResult, - pub children: unsafe extern "C" fn(&Self) -> RVec, + pub children: unsafe extern "C" fn(&Self) -> StabbyVec, - pub new_with_children: - unsafe extern "C" fn(&Self, children: &RVec) -> FFIResult, + pub new_with_children: unsafe extern "C" fn( + &Self, + children: &StabbyVec, + ) -> FFIResult, pub evaluate_bounds: unsafe extern "C" fn( &Self, - children: RVec, + children: StabbyVec, ) -> FFIResult, pub propagate_constraints: unsafe extern "C" fn( &Self, interval: FFI_Interval, - children: RVec, - ) -> FFIResult>>, + children: StabbyVec, + ) -> FFIResult>>, pub evaluate_statistics: unsafe extern "C" fn( &Self, - children: RVec, + children: StabbyVec, ) -> FFIResult, - pub propagate_statistics: - unsafe extern "C" fn( - &Self, - parent: FFI_Distribution, - children: RVec, - ) -> FFIResult>>, + pub propagate_statistics: unsafe extern "C" fn( + &Self, + parent: FFI_Distribution, + children: StabbyVec, + ) -> FFIResult< + FfiOption>, + >, pub get_properties: unsafe extern "C" fn( &Self, - children: RVec, + children: StabbyVec, ) -> FFIResult, - pub fmt_sql: unsafe extern "C" fn(&Self) -> FFIResult, + pub fmt_sql: unsafe extern "C" fn(&Self) -> FFIResult, - pub snapshot: unsafe extern "C" fn(&Self) -> FFIResult>, + pub snapshot: unsafe extern "C" fn(&Self) -> FFIResult>, pub snapshot_generation: unsafe extern "C" fn(&Self) -> u64, pub is_volatile_node: unsafe extern "C" fn(&Self) -> bool, // Display trait - pub display: unsafe extern "C" fn(&Self) -> RString, + pub display: unsafe extern "C" fn(&Self) -> StabbyString, // Hash trait pub hash: unsafe extern "C" fn(&Self) -> u64, @@ -226,7 +230,7 @@ unsafe extern "C" fn evaluate_selection_fn_wrapper( unsafe extern "C" fn children_fn_wrapper( expr: &FFI_PhysicalExpr, -) -> RVec { +) -> StabbyVec { let expr = expr.inner(); let children = expr.children(); children @@ -237,7 +241,7 @@ unsafe extern "C" fn children_fn_wrapper( unsafe extern "C" fn new_with_children_fn_wrapper( expr: &FFI_PhysicalExpr, - children: &RVec, + children: &StabbyVec, ) -> FFIResult { let expr = Arc::clone(expr.inner()); let children = children.iter().map(Into::into).collect::>(); @@ -246,7 +250,7 @@ unsafe extern "C" fn new_with_children_fn_wrapper( unsafe extern "C" fn evaluate_bounds_fn_wrapper( expr: &FFI_PhysicalExpr, - children: RVec, + children: StabbyVec, ) -> FFIResult { let expr = expr.inner(); let children = rresult_return!( @@ -266,8 +270,8 @@ unsafe extern "C" fn evaluate_bounds_fn_wrapper( unsafe extern "C" fn propagate_constraints_fn_wrapper( expr: &FFI_PhysicalExpr, interval: FFI_Interval, - children: RVec, -) -> FFIResult>> { + children: StabbyVec, +) -> FFIResult>> { let expr = expr.inner(); let interval = rresult_return!(Interval::try_from(interval)); let children = rresult_return!( @@ -286,16 +290,16 @@ unsafe extern "C" fn propagate_constraints_fn_wrapper( .map(|intervals| intervals .into_iter() .map(FFI_Interval::try_from) - .collect::>>()) + .collect::>>()) .transpose() ); - RResult::ROk(result.into()) + FfiResult::Ok(result.into()) } unsafe extern "C" fn evaluate_statistics_fn_wrapper( expr: &FFI_PhysicalExpr, - children: RVec, + children: StabbyVec, ) -> FFIResult { let expr = expr.inner(); let children = rresult_return!( @@ -314,8 +318,8 @@ unsafe extern "C" fn evaluate_statistics_fn_wrapper( unsafe extern "C" fn propagate_statistics_fn_wrapper( expr: &FFI_PhysicalExpr, parent: FFI_Distribution, - children: RVec, -) -> FFIResult>> { + children: StabbyVec, +) -> FFIResult>> { let expr = expr.inner(); let parent = rresult_return!(Distribution::try_from(parent)); let children = rresult_return!( @@ -332,16 +336,16 @@ unsafe extern "C" fn propagate_statistics_fn_wrapper( .map(|dists| dists .iter() .map(FFI_Distribution::try_from) - .collect::>>()) + .collect::>>()) .transpose() ); - RResult::ROk(result.into()) + FfiResult::Ok(result.into()) } unsafe extern "C" fn get_properties_fn_wrapper( expr: &FFI_PhysicalExpr, - children: RVec, + children: StabbyVec, ) -> FFIResult { let expr = expr.inner(); let children = rresult_return!( @@ -356,15 +360,17 @@ unsafe extern "C" fn get_properties_fn_wrapper( ) } -unsafe extern "C" fn fmt_sql_fn_wrapper(expr: &FFI_PhysicalExpr) -> FFIResult { +unsafe extern "C" fn fmt_sql_fn_wrapper( + expr: &FFI_PhysicalExpr, +) -> FFIResult { let expr = expr.inner(); let result = fmt_sql(expr.as_ref()).to_string(); - RResult::ROk(result.into()) + FfiResult::Ok(result.into()) } unsafe extern "C" fn snapshot_fn_wrapper( expr: &FFI_PhysicalExpr, -) -> FFIResult> { +) -> FFIResult> { let expr = expr.inner(); rresult!( expr.snapshot() @@ -381,7 +387,7 @@ unsafe extern "C" fn is_volatile_node_fn_wrapper(expr: &FFI_PhysicalExpr) -> boo let expr = expr.inner(); expr.is_volatile_node() } -unsafe extern "C" fn display_fn_wrapper(expr: &FFI_PhysicalExpr) -> RString { +unsafe extern "C" fn display_fn_wrapper(expr: &FFI_PhysicalExpr) -> StabbyString { let expr = expr.inner(); format!("{expr}").into() } @@ -595,7 +601,7 @@ impl PhysicalExpr for ForeignPhysicalExpr { let children = children .iter() .map(|interval| FFI_Interval::try_from(*interval)) - .collect::>>()?; + .collect::>>()?; df_result!((self.expr.evaluate_bounds)(&self.expr, children)) .and_then(Interval::try_from) } @@ -611,7 +617,7 @@ impl PhysicalExpr for ForeignPhysicalExpr { let children = children .iter() .map(|interval| FFI_Interval::try_from(*interval)) - .collect::>>()?; + .collect::>>()?; let result = df_result!((self.expr.propagate_constraints)( &self.expr, interval, children ))?; @@ -633,7 +639,7 @@ impl PhysicalExpr for ForeignPhysicalExpr { let children = children .iter() .map(|dist| FFI_Distribution::try_from(*dist)) - .collect::>>()?; + .collect::>>()?; let result = df_result!((self.expr.evaluate_statistics)(&self.expr, children))?; @@ -651,7 +657,7 @@ impl PhysicalExpr for ForeignPhysicalExpr { let children = children .iter() .map(|dist| FFI_Distribution::try_from(*dist)) - .collect::>>()?; + .collect::>>()?; let result = df_result!((self.expr.propagate_statistics)( &self.expr, parent, children ))?; @@ -674,7 +680,7 @@ impl PhysicalExpr for ForeignPhysicalExpr { let children = children .iter() .map(FFI_ExprProperties::try_from) - .collect::>>()?; + .collect::>>()?; df_result!((self.expr.get_properties)(&self.expr, children)) .and_then(ExprProperties::try_from) } @@ -683,8 +689,8 @@ impl PhysicalExpr for ForeignPhysicalExpr { fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { unsafe { match (self.expr.fmt_sql)(&self.expr) { - RResult::ROk(sql) => write!(f, "{sql}"), - RResult::RErr(_) => Err(std::fmt::Error), + FfiResult::Ok(sql) => write!(f, "{sql}"), + FfiResult::Err(_) => Err(std::fmt::Error), } } } diff --git a/datafusion/ffi/src/physical_expr/partitioning.rs b/datafusion/ffi/src/physical_expr/partitioning.rs index cda4fd2c97f45..58ab3e931b90b 100644 --- a/datafusion/ffi/src/physical_expr/partitioning.rs +++ b/datafusion/ffi/src/physical_expr/partitioning.rs @@ -17,20 +17,19 @@ use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::RVec; use datafusion_physical_expr::Partitioning; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; +use stabby::vec::Vec as StabbyVec; use crate::physical_expr::FFI_PhysicalExpr; /// A stable struct for sharing [`Partitioning`] across FFI boundaries. /// See ['Partitioning'] for the meaning of each variant. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub enum FFI_Partitioning { RoundRobinBatch(usize), - Hash(RVec, usize), + Hash(StabbyVec, usize), UnknownPartitioning(usize), } diff --git a/datafusion/ffi/src/physical_expr/sort.rs b/datafusion/ffi/src/physical_expr/sort.rs index fd3339b10555a..fc8e2a81f36eb 100644 --- a/datafusion/ffi/src/physical_expr/sort.rs +++ b/datafusion/ffi/src/physical_expr/sort.rs @@ -17,7 +17,6 @@ use std::sync::Arc; -use abi_stable::StableAbi; use arrow_schema::SortOptions; use datafusion_physical_expr::PhysicalSortExpr; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; @@ -28,7 +27,7 @@ use crate::physical_expr::FFI_PhysicalExpr; /// A stable struct for sharing [`PhysicalSortExpr`] across FFI boundaries. /// See [`PhysicalSortExpr`] for the meaning of each field. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_PhysicalSortExpr { expr: FFI_PhysicalExpr, options: FFI_SortOptions, diff --git a/datafusion/ffi/src/plan_properties.rs b/datafusion/ffi/src/plan_properties.rs index d009de3f04b99..9010336f23caa 100644 --- a/datafusion/ffi/src/plan_properties.rs +++ b/datafusion/ffi/src/plan_properties.rs @@ -18,8 +18,6 @@ use std::ffi::c_void; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RVec}; use arrow::datatypes::SchemaRef; use datafusion_common::error::{DataFusionError, Result}; use datafusion_physical_expr::EquivalenceProperties; @@ -27,26 +25,29 @@ use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; use datafusion_physical_plan::PlanProperties; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; +use stabby::vec::Vec as StabbyVec; + use crate::arrow_wrappers::WrappedSchema; use crate::physical_expr::partitioning::FFI_Partitioning; use crate::physical_expr::sort::FFI_PhysicalSortExpr; +use crate::util::FfiOption; /// A stable struct for sharing [`PlanProperties`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_PlanProperties { /// The output partitioning of the plan. pub output_partitioning: unsafe extern "C" fn(plan: &Self) -> FFI_Partitioning, /// Return the emission type of the plan. - pub emission_type: unsafe extern "C" fn(plan: &Self) -> FFI_EmissionType, + pub emission_type: unsafe extern "C" fn(plan: &Self) -> FfiEmissionType, /// Indicate boundedness of the plan and its memory requirements. pub boundedness: unsafe extern "C" fn(plan: &Self) -> FFI_Boundedness, /// The output ordering of the plan. pub output_ordering: - unsafe extern "C" fn(plan: &Self) -> ROption>, + unsafe extern "C" fn(plan: &Self) -> FfiOption>, /// Return the schema of the plan. pub schema: unsafe extern "C" fn(plan: &Self) -> WrappedSchema, @@ -83,7 +84,7 @@ unsafe extern "C" fn output_partitioning_fn_wrapper( unsafe extern "C" fn emission_type_fn_wrapper( properties: &FFI_PlanProperties, -) -> FFI_EmissionType { +) -> FfiEmissionType { properties.inner().emission_type.into() } @@ -95,8 +96,8 @@ unsafe extern "C" fn boundedness_fn_wrapper( unsafe extern "C" fn output_ordering_fn_wrapper( properties: &FFI_PlanProperties, -) -> ROption> { - let ordering: Option> = +) -> FfiOption> { + let ordering: Option> = properties.inner().output_ordering().map(|lex_ordering| { let vec_ordering: Vec = lex_ordering.clone().into(); vec_ordering @@ -159,7 +160,7 @@ impl TryFrom for PlanProperties { let ffi_schema = unsafe { (ffi_props.schema)(&ffi_props) }; let schema = (&ffi_schema.0).try_into()?; - let ffi_orderings: Option> = + let ffi_orderings: Option> = unsafe { (ffi_props.output_ordering)(&ffi_props) }.into(); let sort_exprs = ffi_orderings .map(|ordering_vec| { @@ -194,8 +195,8 @@ impl TryFrom for PlanProperties { } /// FFI safe version of [`Boundedness`]. -#[repr(C)] -#[derive(Clone, StableAbi)] +#[repr(C, u8)] +#[derive(Clone)] pub enum FFI_Boundedness { Bounded, Unbounded { requires_infinite_memory: bool }, @@ -228,30 +229,30 @@ impl From for Boundedness { } /// FFI safe version of [`EmissionType`]. -#[repr(C)] -#[derive(Clone, StableAbi)] -pub enum FFI_EmissionType { +#[repr(u8)] +#[derive(Clone)] +pub enum FfiEmissionType { Incremental, Final, Both, } -impl From for FFI_EmissionType { +impl From for FfiEmissionType { fn from(value: EmissionType) -> Self { match value { - EmissionType::Incremental => FFI_EmissionType::Incremental, - EmissionType::Final => FFI_EmissionType::Final, - EmissionType::Both => FFI_EmissionType::Both, + EmissionType::Incremental => FfiEmissionType::Incremental, + EmissionType::Final => FfiEmissionType::Final, + EmissionType::Both => FfiEmissionType::Both, } } } -impl From for EmissionType { - fn from(value: FFI_EmissionType) -> Self { +impl From for EmissionType { + fn from(value: FfiEmissionType) -> Self { match value { - FFI_EmissionType::Incremental => EmissionType::Incremental, - FFI_EmissionType::Final => EmissionType::Final, - FFI_EmissionType::Both => EmissionType::Both, + FfiEmissionType::Incremental => EmissionType::Incremental, + FfiEmissionType::Final => EmissionType::Final, + FfiEmissionType::Both => EmissionType::Both, } } } diff --git a/datafusion/ffi/src/proto/logical_extension_codec.rs b/datafusion/ffi/src/proto/logical_extension_codec.rs index 3781a40539ed1..00cef70af3324 100644 --- a/datafusion/ffi/src/proto/logical_extension_codec.rs +++ b/datafusion/ffi/src/proto/logical_extension_codec.rs @@ -18,8 +18,6 @@ use std::ffi::c_void; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{RResult, RSlice, RStr, RVec}; use arrow::datatypes::SchemaRef; use datafusion_catalog::TableProvider; use datafusion_common::error::Result; @@ -33,6 +31,10 @@ use datafusion_expr::{ use datafusion_proto::logical_plan::{ DefaultLogicalExtensionCodec, LogicalExtensionCodec, }; + +use stabby::slice::Slice as StabbySlice; +use stabby::str::Str as StabbyStr; +use stabby::vec::Vec as StabbyVec; use tokio::runtime::Handle; use crate::arrow_wrappers::WrappedSchema; @@ -41,60 +43,60 @@ use crate::table_provider::FFI_TableProvider; use crate::udaf::FFI_AggregateUDF; use crate::udf::FFI_ScalarUDF; use crate::udwf::FFI_WindowUDF; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiResult}; use crate::{df_result, rresult_return}; /// A stable struct for sharing [`LogicalExtensionCodec`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_LogicalExtensionCodec { /// Decode bytes into a table provider. try_decode_table_provider: unsafe extern "C" fn( &Self, - buf: RSlice, - table_ref: RStr, + buf: StabbySlice, + table_ref: StabbyStr, schema: WrappedSchema, ) -> FFIResult, /// Encode a table provider into bytes. try_encode_table_provider: unsafe extern "C" fn( &Self, - table_ref: RStr, + table_ref: StabbyStr, node: FFI_TableProvider, - ) -> FFIResult>, + ) -> FFIResult>, /// Decode bytes into a user defined scalar function. try_decode_udf: unsafe extern "C" fn( &Self, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult, /// Encode a user defined scalar function into bytes. try_encode_udf: - unsafe extern "C" fn(&Self, node: FFI_ScalarUDF) -> FFIResult>, + unsafe extern "C" fn(&Self, node: FFI_ScalarUDF) -> FFIResult>, /// Decode bytes into a user defined aggregate function. try_decode_udaf: unsafe extern "C" fn( &Self, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult, /// Encode a user defined aggregate function into bytes. try_encode_udaf: - unsafe extern "C" fn(&Self, node: FFI_AggregateUDF) -> FFIResult>, + unsafe extern "C" fn(&Self, node: FFI_AggregateUDF) -> FFIResult>, /// Decode bytes into a user defined window function. try_decode_udwf: unsafe extern "C" fn( &Self, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult, /// Encode a user defined window function into bytes. try_encode_udwf: - unsafe extern "C" fn(&Self, node: FFI_WindowUDF) -> FFIResult>, + unsafe extern "C" fn(&Self, node: FFI_WindowUDF) -> FFIResult>, pub task_ctx_provider: FFI_TaskContextProvider, @@ -143,8 +145,8 @@ impl FFI_LogicalExtensionCodec { unsafe extern "C" fn try_decode_table_provider_fn_wrapper( codec: &FFI_LogicalExtensionCodec, - buf: RSlice, - table_ref: RStr, + buf: StabbySlice, + table_ref: StabbyStr, schema: WrappedSchema, ) -> FFIResult { let ctx = rresult_return!(codec.task_ctx()); @@ -160,7 +162,7 @@ unsafe extern "C" fn try_decode_table_provider_fn_wrapper( ctx.as_ref() )); - RResult::ROk(FFI_TableProvider::new_with_ffi_codec( + FfiResult::Ok(FFI_TableProvider::new_with_ffi_codec( table_provider, true, runtime, @@ -170,9 +172,9 @@ unsafe extern "C" fn try_decode_table_provider_fn_wrapper( unsafe extern "C" fn try_encode_table_provider_fn_wrapper( codec: &FFI_LogicalExtensionCodec, - table_ref: RStr, + table_ref: StabbyStr, node: FFI_TableProvider, -) -> FFIResult> { +) -> FFIResult> { let table_ref = TableReference::from(table_ref.as_str()); let table_provider: Arc = (&node).into(); let codec = codec.inner(); @@ -184,26 +186,26 @@ unsafe extern "C" fn try_encode_table_provider_fn_wrapper( &mut bytes )); - RResult::ROk(bytes.into()) + FfiResult::Ok(bytes.into_iter().collect()) } unsafe extern "C" fn try_decode_udf_fn_wrapper( codec: &FFI_LogicalExtensionCodec, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult { let codec = codec.inner(); let udf = rresult_return!(codec.try_decode_udf(name.as_str(), buf.as_ref())); let udf = FFI_ScalarUDF::from(udf); - RResult::ROk(udf) + FfiResult::Ok(udf) } unsafe extern "C" fn try_encode_udf_fn_wrapper( codec: &FFI_LogicalExtensionCodec, node: FFI_ScalarUDF, -) -> FFIResult> { +) -> FFIResult> { let codec = codec.inner(); let node: Arc = (&node).into(); let node = ScalarUDF::new_from_shared_impl(node); @@ -211,25 +213,25 @@ unsafe extern "C" fn try_encode_udf_fn_wrapper( let mut bytes = Vec::new(); rresult_return!(codec.try_encode_udf(&node, &mut bytes)); - RResult::ROk(bytes.into()) + FfiResult::Ok(bytes.into_iter().collect()) } unsafe extern "C" fn try_decode_udaf_fn_wrapper( codec: &FFI_LogicalExtensionCodec, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult { let codec_inner = codec.inner(); let udaf = rresult_return!(codec_inner.try_decode_udaf(name.into(), buf.as_ref())); let udaf = FFI_AggregateUDF::from(udaf); - RResult::ROk(udaf) + FfiResult::Ok(udaf) } unsafe extern "C" fn try_encode_udaf_fn_wrapper( codec: &FFI_LogicalExtensionCodec, node: FFI_AggregateUDF, -) -> FFIResult> { +) -> FFIResult> { let codec = codec.inner(); let udaf: Arc = (&node).into(); let udaf = AggregateUDF::new_from_shared_impl(udaf); @@ -237,25 +239,25 @@ unsafe extern "C" fn try_encode_udaf_fn_wrapper( let mut bytes = Vec::new(); rresult_return!(codec.try_encode_udaf(&udaf, &mut bytes)); - RResult::ROk(bytes.into()) + FfiResult::Ok(bytes.into_iter().collect()) } unsafe extern "C" fn try_decode_udwf_fn_wrapper( codec: &FFI_LogicalExtensionCodec, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult { let codec = codec.inner(); let udwf = rresult_return!(codec.try_decode_udwf(name.into(), buf.as_ref())); let udwf = FFI_WindowUDF::from(udwf); - RResult::ROk(udwf) + FfiResult::Ok(udwf) } unsafe extern "C" fn try_encode_udwf_fn_wrapper( codec: &FFI_LogicalExtensionCodec, node: FFI_WindowUDF, -) -> FFIResult> { +) -> FFIResult> { let codec = codec.inner(); let udwf: Arc = (&node).into(); let udwf = WindowUDF::new_from_shared_impl(udwf); @@ -263,7 +265,7 @@ unsafe extern "C" fn try_encode_udwf_fn_wrapper( let mut bytes = Vec::new(); rresult_return!(codec.try_encode_udwf(&udwf, &mut bytes)); - RResult::ROk(bytes.into()) + FfiResult::Ok(bytes.into_iter().collect()) } unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_LogicalExtensionCodec) { diff --git a/datafusion/ffi/src/proto/physical_extension_codec.rs b/datafusion/ffi/src/proto/physical_extension_codec.rs index 0577e72366478..f3cc1c6d6022d 100644 --- a/datafusion/ffi/src/proto/physical_extension_codec.rs +++ b/datafusion/ffi/src/proto/physical_extension_codec.rs @@ -18,8 +18,6 @@ use std::ffi::c_void; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{RResult, RSlice, RStr, RVec}; use datafusion_common::error::Result; use datafusion_execution::TaskContext; use datafusion_expr::{ @@ -27,6 +25,10 @@ use datafusion_expr::{ }; use datafusion_physical_plan::ExecutionPlan; use datafusion_proto::physical_plan::PhysicalExtensionCodec; + +use stabby::slice::Slice as StabbySlice; +use stabby::str::Str as StabbyStr; +use stabby::vec::Vec as StabbyVec; use tokio::runtime::Handle; use crate::execution::FFI_TaskContextProvider; @@ -34,56 +36,56 @@ use crate::execution_plan::FFI_ExecutionPlan; use crate::udaf::FFI_AggregateUDF; use crate::udf::FFI_ScalarUDF; use crate::udwf::FFI_WindowUDF; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiResult}; use crate::{df_result, rresult_return}; /// A stable struct for sharing [`PhysicalExtensionCodec`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_PhysicalExtensionCodec { /// Decode bytes into an execution plan. try_decode: unsafe extern "C" fn( &Self, - buf: RSlice, - inputs: RVec, + buf: StabbySlice, + inputs: StabbyVec, ) -> FFIResult, /// Encode an execution plan into bytes. try_encode: - unsafe extern "C" fn(&Self, node: FFI_ExecutionPlan) -> FFIResult>, + unsafe extern "C" fn(&Self, node: FFI_ExecutionPlan) -> FFIResult>, /// Decode bytes into a user defined scalar function. try_decode_udf: unsafe extern "C" fn( &Self, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult, /// Encode a user defined scalar function into bytes. try_encode_udf: - unsafe extern "C" fn(&Self, node: FFI_ScalarUDF) -> FFIResult>, + unsafe extern "C" fn(&Self, node: FFI_ScalarUDF) -> FFIResult>, /// Decode bytes into a user defined aggregate function. try_decode_udaf: unsafe extern "C" fn( &Self, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult, /// Encode a user defined aggregate function into bytes. try_encode_udaf: - unsafe extern "C" fn(&Self, node: FFI_AggregateUDF) -> FFIResult>, + unsafe extern "C" fn(&Self, node: FFI_AggregateUDF) -> FFIResult>, /// Decode bytes into a user defined window function. try_decode_udwf: unsafe extern "C" fn( &Self, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult, /// Encode a user defined window function into bytes. try_encode_udwf: - unsafe extern "C" fn(&Self, node: FFI_WindowUDF) -> FFIResult>, + unsafe extern "C" fn(&Self, node: FFI_WindowUDF) -> FFIResult>, /// Access the current [`TaskContext`]. task_ctx_provider: FFI_TaskContextProvider, @@ -129,8 +131,8 @@ impl FFI_PhysicalExtensionCodec { unsafe extern "C" fn try_decode_fn_wrapper( codec: &FFI_PhysicalExtensionCodec, - buf: RSlice, - inputs: RVec, + buf: StabbySlice, + inputs: StabbyVec, ) -> FFIResult { let task_ctx: Arc = rresult_return!((&codec.task_ctx_provider).try_into()); @@ -144,13 +146,13 @@ unsafe extern "C" fn try_decode_fn_wrapper( let plan = rresult_return!(codec.try_decode(buf.as_ref(), &inputs, task_ctx.as_ref())); - RResult::ROk(FFI_ExecutionPlan::new(plan, None)) + FfiResult::Ok(FFI_ExecutionPlan::new(plan, None)) } unsafe extern "C" fn try_encode_fn_wrapper( codec: &FFI_PhysicalExtensionCodec, node: FFI_ExecutionPlan, -) -> FFIResult> { +) -> FFIResult> { let codec = codec.inner(); let plan: Arc = rresult_return!((&node).try_into()); @@ -158,26 +160,26 @@ unsafe extern "C" fn try_encode_fn_wrapper( let mut bytes = Vec::new(); rresult_return!(codec.try_encode(plan, &mut bytes)); - RResult::ROk(bytes.into()) + FfiResult::Ok(bytes.into_iter().collect()) } unsafe extern "C" fn try_decode_udf_fn_wrapper( codec: &FFI_PhysicalExtensionCodec, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult { let codec = codec.inner(); let udf = rresult_return!(codec.try_decode_udf(name.as_str(), buf.as_ref())); let udf = FFI_ScalarUDF::from(udf); - RResult::ROk(udf) + FfiResult::Ok(udf) } unsafe extern "C" fn try_encode_udf_fn_wrapper( codec: &FFI_PhysicalExtensionCodec, node: FFI_ScalarUDF, -) -> FFIResult> { +) -> FFIResult> { let codec = codec.inner(); let node: Arc = (&node).into(); let node = ScalarUDF::new_from_shared_impl(node); @@ -185,25 +187,25 @@ unsafe extern "C" fn try_encode_udf_fn_wrapper( let mut bytes = Vec::new(); rresult_return!(codec.try_encode_udf(&node, &mut bytes)); - RResult::ROk(bytes.into()) + FfiResult::Ok(bytes.into_iter().collect()) } unsafe extern "C" fn try_decode_udaf_fn_wrapper( codec: &FFI_PhysicalExtensionCodec, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult { let codec_inner = codec.inner(); let udaf = rresult_return!(codec_inner.try_decode_udaf(name.into(), buf.as_ref())); let udaf = FFI_AggregateUDF::from(udaf); - RResult::ROk(udaf) + FfiResult::Ok(udaf) } unsafe extern "C" fn try_encode_udaf_fn_wrapper( codec: &FFI_PhysicalExtensionCodec, node: FFI_AggregateUDF, -) -> FFIResult> { +) -> FFIResult> { let codec = codec.inner(); let udaf: Arc = (&node).into(); let udaf = AggregateUDF::new_from_shared_impl(udaf); @@ -211,25 +213,25 @@ unsafe extern "C" fn try_encode_udaf_fn_wrapper( let mut bytes = Vec::new(); rresult_return!(codec.try_encode_udaf(&udaf, &mut bytes)); - RResult::ROk(bytes.into()) + FfiResult::Ok(bytes.into_iter().collect()) } unsafe extern "C" fn try_decode_udwf_fn_wrapper( codec: &FFI_PhysicalExtensionCodec, - name: RStr, - buf: RSlice, + name: StabbyStr, + buf: StabbySlice, ) -> FFIResult { let codec = codec.inner(); let udwf = rresult_return!(codec.try_decode_udwf(name.into(), buf.as_ref())); let udwf = FFI_WindowUDF::from(udwf); - RResult::ROk(udwf) + FfiResult::Ok(udwf) } unsafe extern "C" fn try_encode_udwf_fn_wrapper( codec: &FFI_PhysicalExtensionCodec, node: FFI_WindowUDF, -) -> FFIResult> { +) -> FFIResult> { let codec = codec.inner(); let udwf: Arc = (&node).into(); let udwf = WindowUDF::new_from_shared_impl(udwf); @@ -237,7 +239,7 @@ unsafe extern "C" fn try_encode_udwf_fn_wrapper( let mut bytes = Vec::new(); rresult_return!(codec.try_encode_udwf(&udwf, &mut bytes)); - RResult::ROk(bytes.into()) + FfiResult::Ok(bytes.into_iter().collect()) } unsafe extern "C" fn release_fn_wrapper(provider: &mut FFI_PhysicalExtensionCodec) { diff --git a/datafusion/ffi/src/record_batch_stream.rs b/datafusion/ffi/src/record_batch_stream.rs index 53078a0e4bbae..e56c977bf6e0f 100644 --- a/datafusion/ffi/src/record_batch_stream.rs +++ b/datafusion/ffi/src/record_batch_stream.rs @@ -18,31 +18,31 @@ use std::ffi::c_void; use std::task::Poll; -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RResult}; use arrow::array::{Array, RecordBatch, StructArray, make_array}; use arrow::ffi::{from_ffi, to_ffi}; use async_ffi::{ContextExt, FfiContext, FfiPoll}; use datafusion_common::{DataFusionError, Result, ffi_datafusion_err, ffi_err}; use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream}; use futures::{Stream, TryStreamExt}; + use tokio::runtime::Handle; use crate::arrow_wrappers::{WrappedArray, WrappedSchema}; use crate::rresult; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiOption, FfiResult}; /// A stable struct for sharing [`RecordBatchStream`] across FFI boundaries. /// We use the async-ffi crate for handling async calls across libraries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_RecordBatchStream { /// This mirrors the `poll_next` of [`RecordBatchStream`] but does so /// in a FFI safe manner. pub poll_next: unsafe extern "C" fn( stream: &Self, cx: &mut FfiContext, - ) -> FfiPoll>>, + ) + -> FfiPoll>>, /// Return the schema of the record batch pub schema: unsafe extern "C" fn(stream: &Self) -> WrappedSchema, @@ -116,20 +116,20 @@ pub(crate) fn record_batch_to_wrapped_array( // probably want to use pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result { fn maybe_record_batch_to_wrapped_stream( record_batch: Option>, -) -> ROption> { +) -> FfiOption> { match record_batch { Some(Ok(record_batch)) => { - ROption::RSome(record_batch_to_wrapped_array(record_batch)) + FfiOption::Some(record_batch_to_wrapped_array(record_batch)) } - Some(Err(e)) => ROption::RSome(RResult::RErr(e.to_string().into())), - None => ROption::RNone, + Some(Err(e)) => FfiOption::Some(FfiResult::Err(e.to_string().into())), + None => FfiOption::None, } } unsafe extern "C" fn poll_next_fn_wrapper( stream: &FFI_RecordBatchStream, cx: &mut FfiContext, -) -> FfiPoll>> { +) -> FfiPoll>> { unsafe { let private_data = stream.private_data as *mut RecordBatchStreamPrivateData; let stream = &mut (*private_data).rbs; @@ -171,14 +171,18 @@ pub(crate) fn wrapped_array_to_record_batch(array: WrappedArray) -> Result>, + array: FfiOption>, ) -> Option> { + let array: Option> = array.into(); match array { - ROption::RSome(RResult::ROk(wrapped_array)) => { - Some(wrapped_array_to_record_batch(wrapped_array)) + Some(result) => { + let result: std::result::Result = result.into(); + match result { + Ok(wrapped_array) => Some(wrapped_array_to_record_batch(wrapped_array)), + Err(e) => Some(ffi_err!("{e}")), + } } - ROption::RSome(RResult::RErr(e)) => Some(ffi_err!("{e}")), - ROption::RNone => None, + None => None, } } diff --git a/datafusion/ffi/src/schema_provider.rs b/datafusion/ffi/src/schema_provider.rs index b8e44b134f87b..8d4f0a997a42c 100644 --- a/datafusion/ffi/src/schema_provider.rs +++ b/datafusion/ffi/src/schema_provider.rs @@ -19,8 +19,6 @@ use std::any::Any; use std::ffi::c_void; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RResult, RString, RVec}; use async_ffi::{FfiFuture, FutureExt}; use async_trait::async_trait; use datafusion_catalog::{SchemaProvider, TableProvider}; @@ -28,42 +26,44 @@ use datafusion_common::error::{DataFusionError, Result}; use datafusion_proto::logical_plan::{ DefaultLogicalExtensionCodec, LogicalExtensionCodec, }; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; use tokio::runtime::Handle; use crate::execution::FFI_TaskContextProvider; use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec; use crate::table_provider::{FFI_TableProvider, ForeignTableProvider}; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiOption, FfiResult}; use crate::{df_result, rresult_return}; /// A stable struct for sharing [`SchemaProvider`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_SchemaProvider { - pub owner_name: ROption, + pub owner_name: FfiOption, - pub table_names: unsafe extern "C" fn(provider: &Self) -> RVec, + pub table_names: unsafe extern "C" fn(provider: &Self) -> StabbyVec, pub table: unsafe extern "C" fn( provider: &Self, - name: RString, + name: StabbyString, ) - -> FfiFuture>>, + -> FfiFuture>>, pub register_table: unsafe extern "C" fn( provider: &Self, - name: RString, + name: StabbyString, table: FFI_TableProvider, ) - -> FFIResult>, + -> FFIResult>, pub deregister_table: unsafe extern "C" fn( provider: &Self, - name: RString, + name: StabbyString, ) - -> FFIResult>, + -> FFIResult>, - pub table_exist: unsafe extern "C" fn(provider: &Self, name: RString) -> bool, + pub table_exist: unsafe extern "C" fn(provider: &Self, name: StabbyString) -> bool, pub logical_codec: FFI_LogicalExtensionCodec, @@ -113,7 +113,7 @@ impl FFI_SchemaProvider { unsafe extern "C" fn table_names_fn_wrapper( provider: &FFI_SchemaProvider, -) -> RVec { +) -> StabbyVec { unsafe { let provider = provider.inner(); @@ -124,8 +124,8 @@ unsafe extern "C" fn table_names_fn_wrapper( unsafe extern "C" fn table_fn_wrapper( provider: &FFI_SchemaProvider, - name: RString, -) -> FfiFuture>> { + name: StabbyString, +) -> FfiFuture>> { unsafe { let runtime = provider.runtime(); let logical_codec = provider.logical_codec.clone(); @@ -138,7 +138,7 @@ unsafe extern "C" fn table_fn_wrapper( }) .into(); - RResult::ROk(table) + FfiResult::Ok(table) } .into_ffi() } @@ -146,9 +146,9 @@ unsafe extern "C" fn table_fn_wrapper( unsafe extern "C" fn register_table_fn_wrapper( provider: &FFI_SchemaProvider, - name: RString, + name: StabbyString, table: FFI_TableProvider, -) -> FFIResult> { +) -> FFIResult> { unsafe { let runtime = provider.runtime(); let logical_codec = provider.logical_codec.clone(); @@ -161,14 +161,14 @@ unsafe extern "C" fn register_table_fn_wrapper( FFI_TableProvider::new_with_ffi_codec(t, true, runtime, logical_codec) }); - RResult::ROk(returned_table.into()) + FfiResult::Ok(returned_table.into()) } } unsafe extern "C" fn deregister_table_fn_wrapper( provider: &FFI_SchemaProvider, - name: RString, -) -> FFIResult> { + name: StabbyString, +) -> FFIResult> { unsafe { let runtime = provider.runtime(); let logical_codec = provider.logical_codec.clone(); @@ -179,13 +179,13 @@ unsafe extern "C" fn deregister_table_fn_wrapper( FFI_TableProvider::new_with_ffi_codec(t, true, runtime, logical_codec) }); - RResult::ROk(returned_table.into()) + FfiResult::Ok(returned_table.into()) } } unsafe extern "C" fn table_exist_fn_wrapper( provider: &FFI_SchemaProvider, - name: RString, + name: StabbyString, ) -> bool { unsafe { provider.inner().table_exist(name.as_str()) } } @@ -313,7 +313,7 @@ impl SchemaProvider for ForeignSchemaProvider { } fn owner_name(&self) -> Option<&str> { - let name: Option<&RString> = self.0.owner_name.as_ref().into(); + let name: Option<&StabbyString> = self.0.owner_name.as_ref(); name.map(|s| s.as_str()) } diff --git a/datafusion/ffi/src/session/config.rs b/datafusion/ffi/src/session/config.rs index 63f0f20ecc7d5..fca0190c07138 100644 --- a/datafusion/ffi/src/session/config.rs +++ b/datafusion/ffi/src/session/config.rs @@ -18,7 +18,6 @@ use std::ffi::c_void; use crate::config::FFI_ConfigOptions; -use abi_stable::StableAbi; use datafusion_common::config::ConfigOptions; use datafusion_common::error::{DataFusionError, Result}; use datafusion_execution::config::SessionConfig; @@ -35,7 +34,7 @@ use datafusion_execution::config::SessionConfig; /// SessionConfig via a FFI interface would be extensive and provide limited /// value over this version. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_SessionConfig { /// FFI stable configuration options. pub config_options: FFI_ConfigOptions, diff --git a/datafusion/ffi/src/session/mod.rs b/datafusion/ffi/src/session/mod.rs index aa910abb9149a..0b3ef4d48c497 100644 --- a/datafusion/ffi/src/session/mod.rs +++ b/datafusion/ffi/src/session/mod.rs @@ -20,13 +20,11 @@ use std::collections::HashMap; use std::ffi::c_void; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{RHashMap, RResult, RStr, RString, RVec}; use arrow_schema::SchemaRef; use arrow_schema::ffi::FFI_ArrowSchema; use async_ffi::{FfiFuture, FutureExt}; use async_trait::async_trait; -use datafusion_common::config::{ConfigOptions, TableOptions}; +use datafusion_common::config::{ConfigFileType, ConfigOptions, TableOptions}; use datafusion_common::{DFSchema, DataFusionError}; use datafusion_execution::TaskContext; use datafusion_execution::config::SessionConfig; @@ -45,6 +43,10 @@ use datafusion_proto::logical_plan::to_proto::serialize_expr; use datafusion_proto::protobuf::LogicalExprNode; use datafusion_session::Session; use prost::Message; + +use stabby::str::Str as StabbyStr; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; use tokio::runtime::Handle; use crate::arrow_wrappers::WrappedSchema; @@ -56,7 +58,7 @@ use crate::session::config::FFI_SessionConfig; use crate::udaf::FFI_AggregateUDF; use crate::udf::FFI_ScalarUDF; use crate::udwf::FFI_WindowUDF; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiResult}; use crate::{df_result, rresult, rresult_return}; pub mod config; @@ -74,34 +76,37 @@ pub mod config; /// which has methods that require `&dyn Session`. For usage within this crate /// we know the [`Session`] lifetimes are valid. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub(crate) struct FFI_SessionRef { - session_id: unsafe extern "C" fn(&Self) -> RStr, + session_id: unsafe extern "C" fn(&Self) -> StabbyStr, config: unsafe extern "C" fn(&Self) -> FFI_SessionConfig, create_physical_plan: unsafe extern "C" fn( &Self, - logical_plan_serialized: RVec, + logical_plan_serialized: StabbyVec, ) -> FfiFuture>, create_physical_expr: unsafe extern "C" fn( &Self, - expr_serialized: RVec, + expr_serialized: StabbyVec, schema: WrappedSchema, ) -> FFIResult, - scalar_functions: unsafe extern "C" fn(&Self) -> RHashMap, + scalar_functions: + unsafe extern "C" fn(&Self) -> StabbyVec<(StabbyString, FFI_ScalarUDF)>, aggregate_functions: - unsafe extern "C" fn(&Self) -> RHashMap, + unsafe extern "C" fn(&Self) -> StabbyVec<(StabbyString, FFI_AggregateUDF)>, - window_functions: unsafe extern "C" fn(&Self) -> RHashMap, + window_functions: + unsafe extern "C" fn(&Self) -> StabbyVec<(StabbyString, FFI_WindowUDF)>, - table_options: unsafe extern "C" fn(&Self) -> RHashMap, + table_options: unsafe extern "C" fn(&Self) -> StabbyVec<(StabbyString, StabbyString)>, - default_table_options: unsafe extern "C" fn(&Self) -> RHashMap, + default_table_options: + unsafe extern "C" fn(&Self) -> StabbyVec<(StabbyString, StabbyString)>, task_ctx: unsafe extern "C" fn(&Self) -> FFI_TaskContext, @@ -148,7 +153,7 @@ impl FFI_SessionRef { } } -unsafe extern "C" fn session_id_fn_wrapper(session: &FFI_SessionRef) -> RStr<'_> { +unsafe extern "C" fn session_id_fn_wrapper(session: &FFI_SessionRef) -> StabbyStr<'_> { let session = session.inner(); session.session_id().into() } @@ -160,7 +165,7 @@ unsafe extern "C" fn config_fn_wrapper(session: &FFI_SessionRef) -> FFI_SessionC unsafe extern "C" fn create_physical_plan_fn_wrapper( session: &FFI_SessionRef, - logical_plan_serialized: RVec, + logical_plan_serialized: StabbyVec, ) -> FfiFuture> { unsafe { let runtime = session.runtime().clone(); @@ -184,7 +189,7 @@ unsafe extern "C" fn create_physical_plan_fn_wrapper( unsafe extern "C" fn create_physical_expr_fn_wrapper( session: &FFI_SessionRef, - expr_serialized: RVec, + expr_serialized: StabbyVec, schema: WrappedSchema, ) -> FFIResult { let codec: Arc = (&session.logical_codec).into(); @@ -199,12 +204,12 @@ unsafe extern "C" fn create_physical_expr_fn_wrapper( let physical_expr = rresult_return!(session.create_physical_expr(logical_expr, &schema)); - RResult::ROk(physical_expr.into()) + FfiResult::Ok(physical_expr.into()) } unsafe extern "C" fn scalar_functions_fn_wrapper( session: &FFI_SessionRef, -) -> RHashMap { +) -> StabbyVec<(StabbyString, FFI_ScalarUDF)> { let session = session.inner(); session .scalar_functions() @@ -215,7 +220,7 @@ unsafe extern "C" fn scalar_functions_fn_wrapper( unsafe extern "C" fn aggregate_functions_fn_wrapper( session: &FFI_SessionRef, -) -> RHashMap { +) -> StabbyVec<(StabbyString, FFI_AggregateUDF)> { let session = session.inner(); session .aggregate_functions() @@ -231,7 +236,7 @@ unsafe extern "C" fn aggregate_functions_fn_wrapper( unsafe extern "C" fn window_functions_fn_wrapper( session: &FFI_SessionRef, -) -> RHashMap { +) -> StabbyVec<(StabbyString, FFI_WindowUDF)> { let session = session.inner(); session .window_functions() @@ -240,29 +245,49 @@ unsafe extern "C" fn window_functions_fn_wrapper( .collect() } -fn table_options_to_rhash(options: &TableOptions) -> RHashMap { - options +fn table_options_to_rhash( + mut options: TableOptions, +) -> StabbyVec<(StabbyString, StabbyString)> { + // It is important that we mutate options here and set current format + // to None so that when we call `entries()` we get ALL format entries. + // We will pass current_format as a special case and strip it on the + // other side of the boundary. + let current_format = options.current_format.take(); + let mut options: HashMap = options .entries() .into_iter() .filter_map(|entry| entry.value.map(|v| (entry.key.into(), v.into()))) - .collect() + .collect(); + if let Some(current_format) = current_format { + options.insert( + "datafusion_ffi.table_current_format".into(), + match current_format { + ConfigFileType::JSON => "json", + ConfigFileType::PARQUET => "parquet", + ConfigFileType::CSV => "csv", + } + .into(), + ); + } + + options.into_iter().collect() } unsafe extern "C" fn table_options_fn_wrapper( session: &FFI_SessionRef, -) -> RHashMap { +) -> StabbyVec<(StabbyString, StabbyString)> { let session = session.inner(); let table_options = session.table_options(); - table_options_to_rhash(table_options) + table_options_to_rhash(table_options.clone()) } unsafe extern "C" fn default_table_options_fn_wrapper( session: &FFI_SessionRef, -) -> RHashMap { +) -> StabbyVec<(StabbyString, StabbyString)> { let session = session.inner(); let table_options = session.default_table_options(); - table_options_to_rhash(&table_options) + table_options_to_rhash(table_options) } unsafe extern "C" fn task_ctx_fn_wrapper(session: &FFI_SessionRef) -> FFI_TaskContext { @@ -389,7 +414,7 @@ impl TryFrom<&FFI_SessionRef> for ForeignSession { let udf = >::from(&kv_pair.1); ( - kv_pair.0.into_string(), + kv_pair.0.to_string(), Arc::new(ScalarUDF::new_from_shared_impl(udf)), ) }) @@ -400,7 +425,7 @@ impl TryFrom<&FFI_SessionRef> for ForeignSession { let udaf = >::from(&kv_pair.1); ( - kv_pair.0.into_string(), + kv_pair.0.to_string(), Arc::new(AggregateUDF::new_from_shared_impl(udaf)), ) }) @@ -411,7 +436,7 @@ impl TryFrom<&FFI_SessionRef> for ForeignSession { let udwf = >::from(&kv_pair.1); ( - kv_pair.0.into_string(), + kv_pair.0.to_string(), Arc::new(WindowUDF::new_from_shared_impl(udwf)), ) }) @@ -437,16 +462,73 @@ impl Clone for FFI_SessionRef { } } -fn table_options_from_rhashmap(options: RHashMap) -> TableOptions { - let options = options +fn table_options_from_rhashmap( + options: StabbyVec<(StabbyString, StabbyString)>, +) -> TableOptions { + let mut options: HashMap = options .into_iter() - .map(|kv_pair| (kv_pair.0.into_string(), kv_pair.1.into_string())) + .map(|kv_pair| (kv_pair.0.to_string(), kv_pair.1.to_string())) .collect(); + let current_format = options.remove("datafusion_ffi.table_current_format"); + + let mut table_options = TableOptions::default(); + let formats = [ + ConfigFileType::CSV, + ConfigFileType::JSON, + ConfigFileType::PARQUET, + ]; + for format in formats { + // It is imperative that if new enum variants are added below that they be + // included in the formats list above and in the extension check below. + let format_name = match &format { + ConfigFileType::CSV => "csv", + ConfigFileType::PARQUET => "parquet", + ConfigFileType::JSON => "json", + }; + let format_options: HashMap = options + .iter() + .filter_map(|(k, v)| { + let (prefix, key) = k.split_once(".")?; + if prefix == format_name { + Some((format!("format.{key}"), v.to_owned())) + } else { + None + } + }) + .collect(); + if !format_options.is_empty() { + table_options.current_format = Some(format.clone()); + table_options + .alter_with_string_hash_map(&format_options) + .unwrap_or_else(|err| log::warn!("Error parsing table options: {err}")); + } + } - TableOptions::from_string_hash_map(&options).unwrap_or_else(|err| { - log::warn!("Error parsing default table options: {err}"); - TableOptions::default() - }) + let extension_options: HashMap = options + .iter() + .filter_map(|(k, v)| { + let (prefix, _) = k.split_once(".")?; + if !["json", "parquet", "csv"].contains(&prefix) { + Some((k.to_owned(), v.to_owned())) + } else { + None + } + }) + .collect(); + if !extension_options.is_empty() { + table_options + .alter_with_string_hash_map(&extension_options) + .unwrap_or_else(|err| log::warn!("Error parsing table options: {err}")); + } + + table_options.current_format = + current_format.and_then(|format| match format.as_str() { + "csv" => Some(ConfigFileType::CSV), + "parquet" => Some(ConfigFileType::PARQUET), + "json" => Some(ConfigFileType::JSON), + _ => None, + }); + table_options } #[async_trait] @@ -495,7 +577,7 @@ impl Session for ForeignSession { let physical_expr = df_result!((self.session.create_physical_expr)( &self.session, - logical_expr.into(), + logical_expr.into_iter().collect(), schema ))?; @@ -556,6 +638,7 @@ mod tests { use std::sync::Arc; use arrow_schema::{DataType, Field, Schema}; + use datafusion::execution::SessionStateBuilder; use datafusion_common::DataFusionError; use datafusion_expr::col; use datafusion_expr::registry::FunctionRegistry; @@ -566,7 +649,16 @@ mod tests { #[tokio::test] async fn test_ffi_session() -> Result<(), DataFusionError> { let (ctx, task_ctx_provider) = crate::util::tests::test_session_and_ctx(); - let state = ctx.state(); + let mut table_options = TableOptions::default(); + table_options.csv.has_header = Some(true); + table_options.json.schema_infer_max_rec = Some(10); + table_options.parquet.global.coerce_int96 = Some("123456789".into()); + table_options.current_format = Some(ConfigFileType::JSON); + + let state = SessionStateBuilder::new_from_existing(ctx.state()) + .with_table_options(table_options) + .build(); + let logical_codec = FFI_LogicalExtensionCodec::new( Arc::new(DefaultLogicalExtensionCodec {}), None, diff --git a/datafusion/ffi/src/table_provider.rs b/datafusion/ffi/src/table_provider.rs index df8b648026d3e..0f8c8c0dbdcd8 100644 --- a/datafusion/ffi/src/table_provider.rs +++ b/datafusion/ffi/src/table_provider.rs @@ -19,8 +19,6 @@ use std::any::Any; use std::ffi::c_void; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RResult, RVec}; use arrow::datatypes::SchemaRef; use async_ffi::{FfiFuture, FutureExt}; use async_trait::async_trait; @@ -37,16 +35,18 @@ use datafusion_proto::logical_plan::{ }; use datafusion_proto::protobuf::LogicalExprList; use prost::Message; + +use stabby::vec::Vec as SVec; use tokio::runtime::Handle; use super::execution_plan::FFI_ExecutionPlan; -use super::insert_op::FFI_InsertOp; +use super::insert_op::FFiInsertOp; use crate::arrow_wrappers::WrappedSchema; use crate::execution::FFI_TaskContextProvider; use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec; use crate::session::{FFI_SessionRef, ForeignSession}; -use crate::table_source::{FFI_TableProviderFilterPushDown, FFI_TableType}; -use crate::util::FFIResult; +use crate::table_source::{FFI_TableType, FfiTableProviderFilterPushDown}; +use crate::util::{FFIResult, FfiOption, FfiResult}; use crate::{df_result, rresult_return}; /// A stable struct for sharing [`TableProvider`] across FFI boundaries. @@ -89,7 +89,7 @@ use crate::{df_result, rresult_return}; /// It is important to be careful when expanding these functions to be certain which /// side of the interface each object refers to. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_TableProvider { /// Return the table schema schema: unsafe extern "C" fn(provider: &Self) -> WrappedSchema, @@ -108,9 +108,9 @@ pub struct FFI_TableProvider { scan: unsafe extern "C" fn( provider: &Self, session: FFI_SessionRef, - projections: RVec, - filters_serialized: RVec, - limit: ROption, + projections: FfiOption>, + filters_serialized: SVec, + limit: FfiOption, ) -> FfiFuture>, /// Return the type of table. See [`TableType`] for options. @@ -122,15 +122,15 @@ pub struct FFI_TableProvider { supports_filters_pushdown: Option< unsafe extern "C" fn( provider: &FFI_TableProvider, - filters_serialized: RVec, - ) -> FFIResult>, + filters_serialized: SVec, + ) -> FFIResult>, >, insert_into: unsafe extern "C" fn( provider: &Self, session: FFI_SessionRef, input: &FFI_ExecutionPlan, - insert_op: FFI_InsertOp, + insert_op: FFiInsertOp, ) -> FfiFuture>, pub logical_codec: FFI_LogicalExtensionCodec, @@ -190,7 +190,7 @@ fn supports_filters_pushdown_internal( filters_serialized: &[u8], task_ctx: &Arc, codec: &dyn LogicalExtensionCodec, -) -> Result> { +) -> Result> { let filters = match filters_serialized.is_empty() { true => vec![], false => { @@ -202,7 +202,7 @@ fn supports_filters_pushdown_internal( }; let filters_borrowed: Vec<&Expr> = filters.iter().collect(); - let results: RVec<_> = provider + let results: SVec<_> = provider .supports_filters_pushdown(&filters_borrowed)? .iter() .map(|v| v.into()) @@ -213,8 +213,8 @@ fn supports_filters_pushdown_internal( unsafe extern "C" fn supports_filters_pushdown_fn_wrapper( provider: &FFI_TableProvider, - filters_serialized: RVec, -) -> FFIResult> { + filters_serialized: SVec, +) -> FFIResult> { let logical_codec: Arc = (&provider.logical_codec).into(); let task_ctx = rresult_return!(>::try_from( &provider.logical_codec.task_ctx_provider @@ -232,9 +232,9 @@ unsafe extern "C" fn supports_filters_pushdown_fn_wrapper( unsafe extern "C" fn scan_fn_wrapper( provider: &FFI_TableProvider, session: FFI_SessionRef, - projections: RVec, - filters_serialized: RVec, - limit: ROption, + projections: FfiOption>, + filters_serialized: SVec, + limit: FfiOption, ) -> FfiFuture> { let task_ctx: Result, DataFusionError> = (&provider.logical_codec.task_ctx_provider).try_into(); @@ -269,15 +269,16 @@ unsafe extern "C" fn scan_fn_wrapper( } }; - let projections: Vec<_> = projections.into_iter().collect(); + let projections: Option> = + projections.into_option().map(|p| p.into_iter().collect()); let plan = rresult_return!( internal_provider - .scan(session, Some(&projections), &filters, limit.into()) + .scan(session, projections.as_ref(), &filters, limit.into()) .await ); - RResult::ROk(FFI_ExecutionPlan::new(plan, runtime.clone())) + FfiResult::Ok(FFI_ExecutionPlan::new(plan, runtime.clone())) } .into_ffi() } @@ -286,7 +287,7 @@ unsafe extern "C" fn insert_into_fn_wrapper( provider: &FFI_TableProvider, session: FFI_SessionRef, input: &FFI_ExecutionPlan, - insert_op: FFI_InsertOp, + insert_op: FFiInsertOp, ) -> FfiFuture> { let runtime = provider.runtime().clone(); let internal_provider = Arc::clone(provider.inner()); @@ -314,7 +315,7 @@ unsafe extern "C" fn insert_into_fn_wrapper( .await ); - RResult::ROk(FFI_ExecutionPlan::new(plan, runtime.clone())) + FfiResult::Ok(FFI_ExecutionPlan::new(plan, runtime.clone())) } .into_ffi() } @@ -390,6 +391,9 @@ impl FFI_TableProvider { runtime: Option, logical_codec: FFI_LogicalExtensionCodec, ) -> Self { + if let Some(provider) = provider.as_any().downcast_ref::() { + return provider.0.clone(); + } let private_data = Box::new(ProviderPrivateData { provider, runtime }); Self { @@ -461,20 +465,21 @@ impl TableProvider for ForeignTableProvider { ) -> Result> { let session = FFI_SessionRef::new(session, None, self.0.logical_codec.clone()); - let projections: Option> = - projection.map(|p| p.iter().map(|v| v.to_owned()).collect()); + let projections: FfiOption> = projection + .map(|p| p.iter().map(|v| v.to_owned()).collect()) + .into(); let codec: Arc = (&self.0.logical_codec).into(); let filter_list = LogicalExprList { expr: serialize_exprs(filters, codec.as_ref())?, }; - let filters_serialized = filter_list.encode_to_vec().into(); + let filters_serialized = filter_list.encode_to_vec().into_iter().collect(); let plan = unsafe { let maybe_plan = (self.0.scan)( &self.0, session, - projections.unwrap_or_default(), + projections, filters_serialized, limit.into(), ) @@ -513,7 +518,10 @@ impl TableProvider for ForeignTableProvider { }; let serialized_filters = expr_list.encode_to_vec(); - let pushdowns = df_result!(pushdown_fn(&self.0, serialized_filters.into()))?; + let pushdowns = df_result!(pushdown_fn( + &self.0, + serialized_filters.into_iter().collect() + ))?; Ok(pushdowns.iter().map(|v| v.into()).collect()) } @@ -529,7 +537,7 @@ impl TableProvider for ForeignTableProvider { let rc = Handle::try_current().ok(); let input = FFI_ExecutionPlan::new(input, rc); - let insert_op: FFI_InsertOp = insert_op.into(); + let insert_op: FFiInsertOp = insert_op.into(); let plan = unsafe { let maybe_plan = @@ -658,8 +666,9 @@ mod tests { let provider = Arc::new(MemTable::try_new(schema, vec![vec![batch1]])?); - let ffi_provider = + let mut ffi_provider = FFI_TableProvider::new(provider, true, None, task_ctx_provider, None); + ffi_provider.library_marker_id = crate::mock_foreign_marker_id; let foreign_table_provider: Arc = (&ffi_provider).into(); @@ -712,4 +721,62 @@ mod tests { Ok(()) } + + #[tokio::test] + async fn test_scan_with_none_projection_returns_all_columns() -> Result<()> { + use arrow::datatypes::Field; + use datafusion::arrow::array::Float32Array; + use datafusion::arrow::datatypes::DataType; + use datafusion::arrow::record_batch::RecordBatch; + use datafusion::datasource::MemTable; + use datafusion::physical_plan::collect; + + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Float32, false), + Field::new("b", DataType::Float32, false), + Field::new("c", DataType::Float32, false), + ])); + + let batch = RecordBatch::try_new( + Arc::clone(&schema), + vec![ + Arc::new(Float32Array::from(vec![1.0, 2.0])), + Arc::new(Float32Array::from(vec![3.0, 4.0])), + Arc::new(Float32Array::from(vec![5.0, 6.0])), + ], + )?; + + let provider = + Arc::new(MemTable::try_new(Arc::clone(&schema), vec![vec![batch]])?); + + let ctx = Arc::new(SessionContext::new()); + let task_ctx_provider = Arc::clone(&ctx) as Arc; + let task_ctx_provider = FFI_TaskContextProvider::from(&task_ctx_provider); + + // Wrap in FFI and force the foreign path (not local bypass) + let mut ffi_provider = + FFI_TableProvider::new(provider, true, None, task_ctx_provider, None); + ffi_provider.library_marker_id = crate::mock_foreign_marker_id; + + let foreign_table_provider: Arc = (&ffi_provider).into(); + + // Call scan with projection=None, meaning "return all columns" + let plan = foreign_table_provider + .scan(&ctx.state(), None, &[], None) + .await?; + assert_eq!( + plan.schema().fields().len(), + 3, + "scan(projection=None) should return all columns; got {}", + plan.schema().fields().len() + ); + + // Also verify we can execute and get correct data + let batches = collect(plan, ctx.task_ctx()).await?; + assert_eq!(batches.len(), 1); + assert_eq!(batches[0].num_columns(), 3); + assert_eq!(batches[0].num_rows(), 2); + + Ok(()) + } } diff --git a/datafusion/ffi/src/table_provider_factory.rs b/datafusion/ffi/src/table_provider_factory.rs index 15789eeab0421..2331e44b29b24 100644 --- a/datafusion/ffi/src/table_provider_factory.rs +++ b/datafusion/ffi/src/table_provider_factory.rs @@ -17,10 +17,6 @@ use std::{ffi::c_void, sync::Arc}; -use abi_stable::{ - StableAbi, - std_types::{RResult, RString, RVec}, -}; use async_ffi::{FfiFuture, FutureExt}; use async_trait::async_trait; use datafusion_catalog::{Session, TableProvider, TableProviderFactory}; @@ -32,12 +28,15 @@ use datafusion_proto::logical_plan::{ }; use datafusion_proto::protobuf::LogicalPlanNode; use prost::Message; + +use stabby::vec::Vec as StabbyVec; use tokio::runtime::Handle; use crate::execution::FFI_TaskContextProvider; use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec; use crate::session::{FFI_SessionRef, ForeignSession}; use crate::table_provider::{FFI_TableProvider, ForeignTableProvider}; +use crate::util::{FFIResult, FfiResult}; use crate::{df_result, rresult_return}; /// A stable struct for sharing [`TableProviderFactory`] across FFI boundaries. @@ -49,7 +48,7 @@ use crate::{df_result, rresult_return}; /// /// [`FFI_TableProvider`]: crate::table_provider::FFI_TableProvider #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_TableProviderFactory { /// Create a TableProvider with the given command. /// @@ -62,8 +61,8 @@ pub struct FFI_TableProviderFactory { create: unsafe extern "C" fn( factory: &Self, session: FFI_SessionRef, - cmd_serialized: RVec, - ) -> FfiFuture>, + cmd_serialized: StabbyVec, + ) -> FfiFuture>, logical_codec: FFI_LogicalExtensionCodec, @@ -144,7 +143,7 @@ impl FFI_TableProviderFactory { fn deserialize_cmd( &self, - cmd_serialized: &RVec, + cmd_serialized: &StabbyVec, ) -> Result { let task_ctx: Arc = (&self.logical_codec.task_ctx_provider).try_into()?; @@ -186,15 +185,15 @@ impl From<&FFI_TableProviderFactory> for Arc { unsafe extern "C" fn create_fn_wrapper( factory: &FFI_TableProviderFactory, session: FFI_SessionRef, - cmd_serialized: RVec, -) -> FfiFuture> { + cmd_serialized: StabbyVec, +) -> FfiFuture> { let factory = factory.clone(); async move { let provider = rresult_return!( create_fn_wrapper_impl(factory, session, cmd_serialized).await ); - RResult::ROk(provider) + FfiResult::Ok(provider) } .into_ffi() } @@ -202,7 +201,7 @@ unsafe extern "C" fn create_fn_wrapper( async fn create_fn_wrapper_impl( factory: FFI_TableProviderFactory, session: FFI_SessionRef, - cmd_serialized: RVec, + cmd_serialized: StabbyVec, ) -> Result { let runtime = factory.runtime().clone(); let ffi_logical_codec = factory.logical_codec.clone(); @@ -269,7 +268,7 @@ impl ForeignTableProviderFactory { fn serialize_cmd( &self, cmd: CreateExternalTable, - ) -> Result, DataFusionError> { + ) -> Result, DataFusionError> { let logical_codec: Arc = (&self.0.logical_codec).into(); @@ -280,7 +279,7 @@ impl ForeignTableProviderFactory { let mut buf: Vec = Vec::new(); plan.try_encode(&mut buf)?; - Ok(buf.into()) + Ok(buf.into_iter().collect()) } } diff --git a/datafusion/ffi/src/table_source.rs b/datafusion/ffi/src/table_source.rs index 2f17d9235a088..0e63ec59a91d8 100644 --- a/datafusion/ffi/src/table_source.rs +++ b/datafusion/ffi/src/table_source.rs @@ -15,49 +15,47 @@ // specific language governing permissions and limitations // under the License. -use abi_stable::StableAbi; use datafusion_expr::{TableProviderFilterPushDown, TableType}; /// FFI safe version of [`TableProviderFilterPushDown`]. -#[repr(C)] -#[derive(StableAbi)] -pub enum FFI_TableProviderFilterPushDown { +#[repr(u8)] +pub enum FfiTableProviderFilterPushDown { Unsupported, Inexact, Exact, } -impl From<&FFI_TableProviderFilterPushDown> for TableProviderFilterPushDown { - fn from(value: &FFI_TableProviderFilterPushDown) -> Self { +impl From<&FfiTableProviderFilterPushDown> for TableProviderFilterPushDown { + fn from(value: &FfiTableProviderFilterPushDown) -> Self { match value { - FFI_TableProviderFilterPushDown::Unsupported => { + FfiTableProviderFilterPushDown::Unsupported => { TableProviderFilterPushDown::Unsupported } - FFI_TableProviderFilterPushDown::Inexact => { + FfiTableProviderFilterPushDown::Inexact => { TableProviderFilterPushDown::Inexact } - FFI_TableProviderFilterPushDown::Exact => TableProviderFilterPushDown::Exact, + FfiTableProviderFilterPushDown::Exact => TableProviderFilterPushDown::Exact, } } } -impl From<&TableProviderFilterPushDown> for FFI_TableProviderFilterPushDown { +impl From<&TableProviderFilterPushDown> for FfiTableProviderFilterPushDown { fn from(value: &TableProviderFilterPushDown) -> Self { match value { TableProviderFilterPushDown::Unsupported => { - FFI_TableProviderFilterPushDown::Unsupported + FfiTableProviderFilterPushDown::Unsupported } TableProviderFilterPushDown::Inexact => { - FFI_TableProviderFilterPushDown::Inexact + FfiTableProviderFilterPushDown::Inexact } - TableProviderFilterPushDown::Exact => FFI_TableProviderFilterPushDown::Exact, + TableProviderFilterPushDown::Exact => FfiTableProviderFilterPushDown::Exact, } } } /// FFI safe version of [`TableType`]. #[repr(C)] -#[derive(Debug, Clone, Copy, PartialEq, Eq, StableAbi)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum FFI_TableType { Base, View, @@ -91,7 +89,7 @@ mod tests { use super::*; fn round_trip_filter_pushdown(pushdown: TableProviderFilterPushDown) -> Result<()> { - let ffi_pushdown: FFI_TableProviderFilterPushDown = (&pushdown).into(); + let ffi_pushdown: FfiTableProviderFilterPushDown = (&pushdown).into(); let round_trip: TableProviderFilterPushDown = (&ffi_pushdown).into(); assert_eq!(pushdown, round_trip); diff --git a/datafusion/ffi/src/tests/mod.rs b/datafusion/ffi/src/tests/mod.rs index cbee5febdb352..329b8b0ab3b25 100644 --- a/datafusion/ffi/src/tests/mod.rs +++ b/datafusion/ffi/src/tests/mod.rs @@ -17,12 +17,6 @@ use std::sync::Arc; -use abi_stable::library::{LibraryError, RootModule}; -use abi_stable::prefix_type::PrefixTypeTrait; -use abi_stable::sabi_types::VersionStrings; -use abi_stable::{ - StableAbi, declare_root_module_statics, export_root_module, package_version_strings, -}; use arrow::array::RecordBatch; use arrow_schema::{DataType, Field, Schema}; use async_provider::create_async_table_provider; @@ -55,8 +49,6 @@ mod udf_udaf_udwf; pub mod utils; #[repr(C)] -#[derive(StableAbi)] -#[sabi(kind(Prefix(prefix_ref = ForeignLibraryModuleRef)))] /// This struct defines the module interfaces. It is to be shared by /// both the module loading program and library that implements the /// module. @@ -101,17 +93,6 @@ pub struct ForeignLibraryModule { pub version: extern "C" fn() -> u64, } -impl RootModule for ForeignLibraryModuleRef { - declare_root_module_statics! {ForeignLibraryModuleRef} - const BASE_NAME: &'static str = "datafusion_ffi"; - const NAME: &'static str = "datafusion_ffi"; - const VERSION_STRINGS: VersionStrings = package_version_strings!(); - - fn initialization(self) -> Result { - Ok(self) - } -} - pub fn create_test_schema() -> Arc { Arc::new(Schema::new(vec![ Field::new("a", DataType::Int32, true), @@ -147,9 +128,9 @@ extern "C" fn construct_table_provider_factory( table_provider_factory::create(codec) } -#[export_root_module] /// This defines the entry point for using the module. -pub fn get_foreign_library_module() -> ForeignLibraryModuleRef { +#[unsafe(no_mangle)] +pub extern "C" fn datafusion_ffi_get_module() -> ForeignLibraryModule { ForeignLibraryModule { create_catalog: create_catalog_provider, create_catalog_list: create_catalog_provider_list, @@ -164,5 +145,4 @@ pub fn get_foreign_library_module() -> ForeignLibraryModuleRef { create_extension_options: config::create_extension_options, version: super::version, } - .leak_into_prefix() } diff --git a/datafusion/ffi/src/tests/utils.rs b/datafusion/ffi/src/tests/utils.rs index 9659a51f04b01..e1374c786266b 100644 --- a/datafusion/ffi/src/tests/utils.rs +++ b/datafusion/ffi/src/tests/utils.rs @@ -15,47 +15,63 @@ // specific language governing permissions and limitations // under the License. -use std::path::Path; +use std::path::{Path, PathBuf}; -use abi_stable::library::RootModule; use datafusion_common::{DataFusionError, Result}; -use crate::tests::ForeignLibraryModuleRef; +use crate::tests::ForeignLibraryModule; -/// Compute the path to the library. It would be preferable to simply use -/// abi_stable::library::development_utils::compute_library_path however -/// our current CI pipeline has a `ci` profile that we need to use to -/// find the library. -pub fn compute_library_path( - target_path: &Path, -) -> std::io::Result { +/// Compute the path to the built cdylib. Checks debug, release, and ci profile dirs. +fn compute_library_dir(target_path: &Path) -> PathBuf { let debug_dir = target_path.join("debug"); let release_dir = target_path.join("release"); let ci_dir = target_path.join("ci"); - let debug_path = M::get_library_path(&debug_dir.join("deps")); - let release_path = M::get_library_path(&release_dir.join("deps")); - let ci_path = M::get_library_path(&ci_dir.join("deps")); + let all_dirs = vec![debug_dir.clone(), release_dir, ci_dir]; - let all_paths = vec![ - (debug_dir.clone(), debug_path), - (release_dir, release_path), - (ci_dir, ci_path), - ]; - - let best_path = all_paths + all_dirs .into_iter() - .filter(|(_, path)| path.exists()) - .filter_map(|(dir, path)| path.metadata().map(|m| (dir, m)).ok()) - .filter_map(|(dir, meta)| meta.modified().map(|m| (dir, m)).ok()) + .filter(|dir| dir.join("deps").exists()) + .filter_map(|dir| { + dir.join("deps") + .metadata() + .and_then(|m| m.modified()) + .ok() + .map(|date| (dir, date)) + }) .max_by_key(|(_, date)| *date) .map(|(dir, _)| dir) - .unwrap_or(debug_dir); + .unwrap_or(debug_dir) +} + +/// Find the cdylib file for datafusion_ffi in the given directory. +fn find_cdylib(deps_dir: &Path) -> Result { + let lib_prefix = if cfg!(target_os = "windows") { + "" + } else { + "lib" + }; + let lib_ext = if cfg!(target_os = "macos") { + "dylib" + } else if cfg!(target_os = "windows") { + "dll" + } else { + "so" + }; - Ok(best_path) + let pattern = format!("{lib_prefix}datafusion_ffi.{lib_ext}"); + let lib_path = deps_dir.join(&pattern); + + if lib_path.exists() { + return Ok(lib_path); + } + + Err(DataFusionError::External( + format!("Could not find library at {}", lib_path.display()).into(), + )) } -pub fn get_module() -> Result { +pub fn get_module() -> Result { let expected_version = crate::version(); let crate_root = Path::new(env!("CARGO_MANIFEST_DIR")); @@ -66,24 +82,26 @@ pub fn get_module() -> Result { .expect("Failed to find workspace root") .join("target"); - // Find the location of the library. This is specific to the build environment, - // so you will need to change the approach here based on your use case. - // let target: &std::path::Path = "../../../../target/".as_ref(); - let library_path = - compute_library_path::(target_dir.as_path()) + let library_dir = compute_library_dir(target_dir.as_path()); + let lib_path = find_cdylib(&library_dir.join("deps"))?; + + // Load the library using libloading + let lib = unsafe { + libloading::Library::new(&lib_path) .map_err(|e| DataFusionError::External(Box::new(e)))? - .join("deps"); - - // Load the module - let module = ForeignLibraryModuleRef::load_from_directory(&library_path) - .map_err(|e| DataFusionError::External(Box::new(e)))?; - - assert_eq!( - module - .version() - .expect("Unable to call version on FFI module")(), - expected_version - ); + }; + + let get_module: libloading::Symbol ForeignLibraryModule> = unsafe { + lib.get(b"datafusion_ffi_get_module") + .map_err(|e| DataFusionError::External(Box::new(e)))? + }; + + let module = get_module(); + + assert_eq!((module.version)(), expected_version); + + // Leak the library to keep it loaded for the duration of the test + std::mem::forget(lib); Ok(module) } diff --git a/datafusion/ffi/src/udaf/accumulator.rs b/datafusion/ffi/src/udaf/accumulator.rs index 6d2b86a3f2e62..14509389e2702 100644 --- a/datafusion/ffi/src/udaf/accumulator.rs +++ b/datafusion/ffi/src/udaf/accumulator.rs @@ -19,8 +19,6 @@ use std::ffi::c_void; use std::ops::Deref; use std::ptr::null_mut; -use abi_stable::StableAbi; -use abi_stable::std_types::{RResult, RVec}; use arrow::array::ArrayRef; use arrow::error::ArrowError; use datafusion_common::error::{DataFusionError, Result}; @@ -28,36 +26,41 @@ use datafusion_common::scalar::ScalarValue; use datafusion_expr::Accumulator; use prost::Message; +use stabby::vec::Vec as StabbyVec; + use crate::arrow_wrappers::WrappedArray; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiResult}; use crate::{df_result, rresult, rresult_return}; /// A stable struct for sharing [`Accumulator`] across FFI boundaries. /// For an explanation of each field, see the corresponding function /// defined in [`Accumulator`]. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_Accumulator { pub update_batch: unsafe extern "C" fn( accumulator: &mut Self, - values: RVec, + values: StabbyVec, ) -> FFIResult<()>, // Evaluate and return a ScalarValues as protobuf bytes - pub evaluate: unsafe extern "C" fn(accumulator: &mut Self) -> FFIResult>, + pub evaluate: + unsafe extern "C" fn(accumulator: &mut Self) -> FFIResult>, pub size: unsafe extern "C" fn(accumulator: &Self) -> usize, - pub state: unsafe extern "C" fn(accumulator: &mut Self) -> FFIResult>>, + pub state: unsafe extern "C" fn( + accumulator: &mut Self, + ) -> FFIResult>>, pub merge_batch: unsafe extern "C" fn( accumulator: &mut Self, - states: RVec, + states: StabbyVec, ) -> FFIResult<()>, pub retract_batch: unsafe extern "C" fn( accumulator: &mut Self, - values: RVec, + values: StabbyVec, ) -> FFIResult<()>, pub supports_retract_batch: bool, @@ -102,7 +105,7 @@ impl FFI_Accumulator { unsafe extern "C" fn update_batch_fn_wrapper( accumulator: &mut FFI_Accumulator, - values: RVec, + values: StabbyVec, ) -> FFIResult<()> { unsafe { let accumulator = accumulator.inner_mut(); @@ -119,7 +122,7 @@ unsafe extern "C" fn update_batch_fn_wrapper( unsafe extern "C" fn evaluate_fn_wrapper( accumulator: &mut FFI_Accumulator, -) -> FFIResult> { +) -> FFIResult> { unsafe { let accumulator = accumulator.inner_mut(); @@ -127,7 +130,7 @@ unsafe extern "C" fn evaluate_fn_wrapper( let proto_result: datafusion_proto::protobuf::ScalarValue = rresult_return!((&scalar_result).try_into()); - RResult::ROk(proto_result.encode_to_vec().into()) + FfiResult::Ok(proto_result.encode_to_vec().into_iter().collect()) } } @@ -137,7 +140,7 @@ unsafe extern "C" fn size_fn_wrapper(accumulator: &FFI_Accumulator) -> usize { unsafe extern "C" fn state_fn_wrapper( accumulator: &mut FFI_Accumulator, -) -> FFIResult>> { +) -> FFIResult>> { unsafe { let accumulator = accumulator.inner_mut(); @@ -147,10 +150,10 @@ unsafe extern "C" fn state_fn_wrapper( .map(|state_val| { datafusion_proto::protobuf::ScalarValue::try_from(&state_val) .map_err(DataFusionError::from) - .map(|v| RVec::from(v.encode_to_vec())) + .map(|v| v.encode_to_vec().into_iter().collect::>()) }) .collect::>>() - .map(|state_vec| state_vec.into()); + .map(|state_vec| state_vec.into_iter().collect()); rresult!(state) } @@ -158,7 +161,7 @@ unsafe extern "C" fn state_fn_wrapper( unsafe extern "C" fn merge_batch_fn_wrapper( accumulator: &mut FFI_Accumulator, - states: RVec, + states: StabbyVec, ) -> FFIResult<()> { unsafe { let accumulator = accumulator.inner_mut(); @@ -176,7 +179,7 @@ unsafe extern "C" fn merge_batch_fn_wrapper( unsafe extern "C" fn retract_batch_fn_wrapper( accumulator: &mut FFI_Accumulator, - values: RVec, + values: StabbyVec, ) -> FFIResult<()> { unsafe { let accumulator = accumulator.inner_mut(); @@ -265,7 +268,7 @@ impl Accumulator for ForeignAccumulator { .collect::, ArrowError>>()?; df_result!((self.accumulator.update_batch)( &mut self.accumulator, - values.into() + values.into_iter().collect() )) } } @@ -314,7 +317,7 @@ impl Accumulator for ForeignAccumulator { .collect::, ArrowError>>()?; df_result!((self.accumulator.merge_batch)( &mut self.accumulator, - states.into() + states.into_iter().collect() )) } } @@ -327,7 +330,7 @@ impl Accumulator for ForeignAccumulator { .collect::, ArrowError>>()?; df_result!((self.accumulator.retract_batch)( &mut self.accumulator, - values.into() + values.into_iter().collect() )) } } diff --git a/datafusion/ffi/src/udaf/accumulator_args.rs b/datafusion/ffi/src/udaf/accumulator_args.rs index a3359231073c4..3abd1e57a0839 100644 --- a/datafusion/ffi/src/udaf/accumulator_args.rs +++ b/datafusion/ffi/src/udaf/accumulator_args.rs @@ -17,14 +17,14 @@ use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{RString, RVec}; use arrow::datatypes::Schema; use arrow::ffi::FFI_ArrowSchema; use arrow_schema::FieldRef; use datafusion_common::error::DataFusionError; use datafusion_expr::function::AccumulatorArgs; use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; use crate::arrow_wrappers::WrappedSchema; use crate::physical_expr::FFI_PhysicalExpr; @@ -35,17 +35,17 @@ use crate::util::{rvec_wrapped_to_vec_fieldref, vec_fieldref_to_rvec_wrapped}; /// For an explanation of each field, see the corresponding field /// defined in [`AccumulatorArgs`]. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_AccumulatorArgs { return_field: WrappedSchema, schema: WrappedSchema, ignore_nulls: bool, - order_bys: RVec, + order_bys: StabbyVec, is_reversed: bool, - name: RString, + name: StabbyString, is_distinct: bool, - exprs: RVec, - expr_fields: RVec, + exprs: StabbyVec, + expr_fields: StabbyVec, } impl TryFrom> for FFI_AccumulatorArgs { @@ -55,7 +55,7 @@ impl TryFrom> for FFI_AccumulatorArgs { WrappedSchema(FFI_ArrowSchema::try_from(args.return_field.as_ref())?); let schema = WrappedSchema(FFI_ArrowSchema::try_from(args.schema)?); - let order_bys: RVec<_> = args + let order_bys: StabbyVec<_> = args .order_bys .iter() .map(FFI_PhysicalSortExpr::from) diff --git a/datafusion/ffi/src/udaf/groups_accumulator.rs b/datafusion/ffi/src/udaf/groups_accumulator.rs index fc4ce4b8ba9d0..1addface85ffa 100644 --- a/datafusion/ffi/src/udaf/groups_accumulator.rs +++ b/datafusion/ffi/src/udaf/groups_accumulator.rs @@ -20,29 +20,29 @@ use std::ops::Deref; use std::ptr::null_mut; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RVec}; use arrow::array::{Array, ArrayRef, BooleanArray}; use arrow::error::ArrowError; use arrow::ffi::to_ffi; use datafusion_common::error::{DataFusionError, Result}; use datafusion_expr::{EmitTo, GroupsAccumulator}; +use stabby::vec::Vec as StabbyVec; + use crate::arrow_wrappers::{WrappedArray, WrappedSchema}; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiOption}; use crate::{df_result, rresult, rresult_return}; /// A stable struct for sharing [`GroupsAccumulator`] across FFI boundaries. /// For an explanation of each field, see the corresponding function /// defined in [`GroupsAccumulator`]. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_GroupsAccumulator { pub update_batch: unsafe extern "C" fn( accumulator: &mut Self, - values: RVec, - group_indices: RVec, - opt_filter: ROption, + values: StabbyVec, + group_indices: StabbyVec, + opt_filter: FfiOption, total_num_groups: usize, ) -> FFIResult<()>, @@ -57,21 +57,21 @@ pub struct FFI_GroupsAccumulator { pub state: unsafe extern "C" fn( accumulator: &mut Self, emit_to: FFI_EmitTo, - ) -> FFIResult>, + ) -> FFIResult>, pub merge_batch: unsafe extern "C" fn( accumulator: &mut Self, - values: RVec, - group_indices: RVec, - opt_filter: ROption, + values: StabbyVec, + group_indices: StabbyVec, + opt_filter: FfiOption, total_num_groups: usize, ) -> FFIResult<()>, pub convert_to_state: unsafe extern "C" fn( accumulator: &Self, - values: RVec, - opt_filter: ROption, - ) -> FFIResult>, + values: StabbyVec, + opt_filter: FfiOption, + ) -> FFIResult>, pub supports_convert_to_state: bool, @@ -110,7 +110,7 @@ impl FFI_GroupsAccumulator { } } -fn process_values(values: RVec) -> Result>> { +fn process_values(values: StabbyVec) -> Result>> { values .into_iter() .map(|v| v.try_into().map_err(DataFusionError::from)) @@ -118,7 +118,9 @@ fn process_values(values: RVec) -> Result>> { } /// Convert C-typed opt_filter into the internal type. -fn process_opt_filter(opt_filter: ROption) -> Result> { +fn process_opt_filter( + opt_filter: FfiOption, +) -> Result> { opt_filter .into_option() .map(|filter| { @@ -131,9 +133,9 @@ fn process_opt_filter(opt_filter: ROption) -> Result, - group_indices: RVec, - opt_filter: ROption, + values: StabbyVec, + group_indices: StabbyVec, + opt_filter: FfiOption, total_num_groups: usize, ) -> FFIResult<()> { unsafe { @@ -174,7 +176,7 @@ unsafe extern "C" fn size_fn_wrapper(accumulator: &FFI_GroupsAccumulator) -> usi unsafe extern "C" fn state_fn_wrapper( accumulator: &mut FFI_GroupsAccumulator, emit_to: FFI_EmitTo, -) -> FFIResult> { +) -> FFIResult> { unsafe { let accumulator = accumulator.inner_mut(); @@ -183,16 +185,16 @@ unsafe extern "C" fn state_fn_wrapper( state .into_iter() .map(|arr| WrappedArray::try_from(&arr).map_err(DataFusionError::from)) - .collect::>>() + .collect::>>() ) } } unsafe extern "C" fn merge_batch_fn_wrapper( accumulator: &mut FFI_GroupsAccumulator, - values: RVec, - group_indices: RVec, - opt_filter: ROption, + values: StabbyVec, + group_indices: StabbyVec, + opt_filter: FfiOption, total_num_groups: usize, ) -> FFIResult<()> { unsafe { @@ -212,9 +214,9 @@ unsafe extern "C" fn merge_batch_fn_wrapper( unsafe extern "C" fn convert_to_state_fn_wrapper( accumulator: &FFI_GroupsAccumulator, - values: RVec, - opt_filter: ROption, -) -> FFIResult> { + values: StabbyVec, + opt_filter: FfiOption, +) -> FFIResult> { unsafe { let accumulator = accumulator.inner(); let values = rresult_return!(process_values(values)); @@ -226,7 +228,7 @@ unsafe extern "C" fn convert_to_state_fn_wrapper( state .iter() .map(|arr| WrappedArray::try_from(arr).map_err(DataFusionError::from)) - .collect::>>() + .collect::>>() ) } } @@ -326,7 +328,7 @@ impl GroupsAccumulator for ForeignGroupsAccumulator { df_result!((self.accumulator.update_batch)( &mut self.accumulator, - values.into(), + values.into_iter().collect(), group_indices, opt_filter, total_num_groups @@ -389,7 +391,7 @@ impl GroupsAccumulator for ForeignGroupsAccumulator { df_result!((self.accumulator.merge_batch)( &mut self.accumulator, - values.into(), + values.into_iter().collect(), group_indices, opt_filter, total_num_groups @@ -406,7 +408,7 @@ impl GroupsAccumulator for ForeignGroupsAccumulator { let values = values .iter() .map(WrappedArray::try_from) - .collect::, ArrowError>>()?; + .collect::, ArrowError>>()?; let opt_filter = opt_filter .map(|bool_array| to_ffi(&bool_array.to_data())) @@ -436,7 +438,7 @@ impl GroupsAccumulator for ForeignGroupsAccumulator { } #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub enum FFI_EmitTo { All, First(usize), diff --git a/datafusion/ffi/src/udaf/mod.rs b/datafusion/ffi/src/udaf/mod.rs index 22cbe8cff0fe6..47ec70e14eb8d 100644 --- a/datafusion/ffi/src/udaf/mod.rs +++ b/datafusion/ffi/src/udaf/mod.rs @@ -15,12 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::ffi::c_void; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; - -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RResult, RStr, RString, RVec}; use accumulator::FFI_Accumulator; use accumulator_args::{FFI_AccumulatorArgs, ForeignAccumulatorArgs}; use arrow::datatypes::{DataType, Field}; @@ -40,12 +34,20 @@ use datafusion_proto_common::from_proto::parse_proto_fields_to_fields; use groups_accumulator::FFI_GroupsAccumulator; use prost::{DecodeError, Message}; +use stabby::str::Str as StabbyStr; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; +use std::ffi::c_void; +use std::hash::{Hash, Hasher}; +use std::sync::Arc; + use crate::arrow_wrappers::WrappedSchema; use crate::util::{ - FFIResult, rvec_wrapped_to_vec_datatype, rvec_wrapped_to_vec_fieldref, - vec_datatype_to_rvec_wrapped, vec_fieldref_to_rvec_wrapped, + FFIResult, FfiOption, FfiResult, rvec_wrapped_to_vec_datatype, + rvec_wrapped_to_vec_fieldref, vec_datatype_to_rvec_wrapped, + vec_fieldref_to_rvec_wrapped, }; -use crate::volatility::FFI_Volatility; +use crate::volatility::FfiVolatility; use crate::{df_result, rresult, rresult_return}; mod accumulator; @@ -54,22 +56,22 @@ mod groups_accumulator; /// A stable struct for sharing a [`AggregateUDF`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_AggregateUDF { /// FFI equivalent to the `name` of a [`AggregateUDF`] - pub name: RString, + pub name: StabbyString, /// FFI equivalent to the `aliases` of a [`AggregateUDF`] - pub aliases: RVec, + pub aliases: StabbyVec, /// FFI equivalent to the `volatility` of a [`AggregateUDF`] - pub volatility: FFI_Volatility, + pub volatility: FfiVolatility, /// Determines the return field of the underlying [`AggregateUDF`] based on the /// argument fields. pub return_field: unsafe extern "C" fn( udaf: &Self, - arg_fields: RVec, + arg_fields: StabbyVec, ) -> FFIResult, /// FFI equivalent to the `is_nullable` of a [`AggregateUDF`] @@ -95,12 +97,12 @@ pub struct FFI_AggregateUDF { /// FFI equivalent to [`AggregateUDF::state_fields`] pub state_fields: unsafe extern "C" fn( udaf: &FFI_AggregateUDF, - name: &RStr, - input_fields: RVec, + name: &StabbyStr, + input_fields: StabbyVec, return_field: WrappedSchema, - ordering_fields: RVec>, + ordering_fields: StabbyVec>, is_distinct: bool, - ) -> FFIResult>>, + ) -> FFIResult>>, /// FFI equivalent to [`AggregateUDF::create_groups_accumulator`] pub create_groups_accumulator: @@ -114,7 +116,7 @@ pub struct FFI_AggregateUDF { unsafe extern "C" fn( udaf: &FFI_AggregateUDF, beneficial_ordering: bool, - ) -> FFIResult>, + ) -> FFIResult>, /// FFI equivalent to [`AggregateUDF::order_sensitivity`] pub order_sensitivity: @@ -126,8 +128,8 @@ pub struct FFI_AggregateUDF { /// appropriate calls on the underlying [`AggregateUDF`] pub coerce_types: unsafe extern "C" fn( udf: &Self, - arg_types: RVec, - ) -> FFIResult>, + arg_types: StabbyVec, + ) -> FFIResult>, /// Used to create a clone on the provider of the udaf. This should /// only need to be called by the receiver of the udaf. @@ -164,7 +166,7 @@ impl FFI_AggregateUDF { unsafe extern "C" fn return_field_fn_wrapper( udaf: &FFI_AggregateUDF, - arg_fields: RVec, + arg_fields: StabbyVec, ) -> FFIResult { unsafe { let udaf = udaf.inner(); @@ -249,7 +251,7 @@ unsafe extern "C" fn groups_accumulator_supported_fn_wrapper( unsafe extern "C" fn with_beneficial_ordering_fn_wrapper( udaf: &FFI_AggregateUDF, beneficial_ordering: bool, -) -> FFIResult> { +) -> FFIResult> { unsafe { let udaf = udaf.inner().as_ref().clone(); @@ -262,18 +264,18 @@ unsafe extern "C" fn with_beneficial_ordering_fn_wrapper( .flatten() .map(|func| FFI_AggregateUDF::from(Arc::new(func))); - RResult::ROk(result.into()) + FfiResult::Ok(result.into()) } } unsafe extern "C" fn state_fields_fn_wrapper( udaf: &FFI_AggregateUDF, - name: &RStr, - input_fields: RVec, + name: &StabbyStr, + input_fields: StabbyVec, return_field: WrappedSchema, - ordering_fields: RVec>, + ordering_fields: StabbyVec>, is_distinct: bool, -) -> FFIResult>> { +) -> FFIResult>> { unsafe { let udaf = udaf.inner(); @@ -313,10 +315,10 @@ unsafe extern "C" fn state_fields_fn_wrapper( .collect::>>() ) .into_iter() - .map(|field| field.encode_to_vec().into()) + .map(|field| field.encode_to_vec().into_iter().collect()) .collect(); - RResult::ROk(state_fields) + FfiResult::Ok(state_fields) } } @@ -328,8 +330,8 @@ unsafe extern "C" fn order_sensitivity_fn_wrapper( unsafe extern "C" fn coerce_types_fn_wrapper( udaf: &FFI_AggregateUDF, - arg_types: RVec, -) -> FFIResult> { + arg_types: StabbyVec, +) -> FFIResult> { unsafe { let udaf = udaf.inner(); @@ -497,7 +499,7 @@ impl AggregateUDFImpl for ForeignAggregateUDF { fn state_fields(&self, args: StateFieldsArgs) -> Result> { unsafe { - let name = RStr::from_str(args.name); + let name = StabbyStr::from(args.name); let input_fields = vec_fieldref_to_rvec_wrapped(args.input_fields)?; let return_field = WrappedSchema(FFI_ArrowSchema::try_from(args.return_field.as_ref())?); @@ -509,7 +511,7 @@ impl AggregateUDFImpl for ForeignAggregateUDF { .map(|v| v.map_err(DataFusionError::from)) .collect::>>()? .into_iter() - .map(|proto_field| proto_field.encode_to_vec().into()) + .map(|proto_field| proto_field.encode_to_vec().into_iter().collect()) .collect(); let fields = df_result!((self.udaf.state_fields)( @@ -609,7 +611,7 @@ impl AggregateUDFImpl for ForeignAggregateUDF { } #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub enum FFI_AggregateOrderSensitivity { Insensitive, HardRequirement, diff --git a/datafusion/ffi/src/udf/mod.rs b/datafusion/ffi/src/udf/mod.rs index 94be5f38eab0b..6a3636b8b6519 100644 --- a/datafusion/ffi/src/udf/mod.rs +++ b/datafusion/ffi/src/udf/mod.rs @@ -19,9 +19,7 @@ use std::ffi::c_void; use std::hash::{Hash, Hasher}; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{RResult, RString, RVec}; -use arrow::array::ArrayRef; +use arrow::array::Array; use arrow::datatypes::{DataType, Field}; use arrow::error::ArrowError; use arrow::ffi::{FFI_ArrowSchema, from_ffi, to_ffi}; @@ -37,27 +35,32 @@ use return_type_args::{ FFI_ReturnFieldArgs, ForeignReturnFieldArgs, ForeignReturnFieldArgsOwned, }; +use stabby::string::String as SString; +use stabby::vec::Vec as SVec; + use crate::arrow_wrappers::{WrappedArray, WrappedSchema}; +use crate::config::FFI_ConfigOptions; +use crate::expr::columnar_value::FFI_ColumnarValue; use crate::util::{ FFIResult, rvec_wrapped_to_vec_datatype, vec_datatype_to_rvec_wrapped, }; -use crate::volatility::FFI_Volatility; +use crate::volatility::FfiVolatility; use crate::{df_result, rresult, rresult_return}; pub mod return_type_args; /// A stable struct for sharing a [`ScalarUDF`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_ScalarUDF { /// FFI equivalent to the `name` of a [`ScalarUDF`] - pub name: RString, + pub name: SString, /// FFI equivalent to the `aliases` of a [`ScalarUDF`] - pub aliases: RVec, + pub aliases: SVec, /// FFI equivalent to the `volatility` of a [`ScalarUDF`] - pub volatility: FFI_Volatility, + pub volatility: FfiVolatility, /// Determines the return info of the underlying [`ScalarUDF`]. pub return_field_from_args: unsafe extern "C" fn( @@ -69,11 +72,12 @@ pub struct FFI_ScalarUDF { /// within an AbiStable wrapper. pub invoke_with_args: unsafe extern "C" fn( udf: &Self, - args: RVec, - arg_fields: RVec, + args: SVec, + arg_fields: SVec, num_rows: usize, return_field: WrappedSchema, - ) -> FFIResult, + config_options: FFI_ConfigOptions, + ) -> FFIResult, /// See [`ScalarUDFImpl`] for details on short_circuits pub short_circuits: bool, @@ -84,8 +88,8 @@ pub struct FFI_ScalarUDF { /// appropriate calls on the underlying [`ScalarUDF`] pub coerce_types: unsafe extern "C" fn( udf: &Self, - arg_types: RVec, - ) -> FFIResult>, + arg_types: SVec, + ) -> FFIResult>, /// Used to create a clone on the provider of the udf. This should /// only need to be called by the receiver of the udf. @@ -136,8 +140,8 @@ unsafe extern "C" fn return_field_from_args_fn_wrapper( unsafe extern "C" fn coerce_types_fn_wrapper( udf: &FFI_ScalarUDF, - arg_types: RVec, -) -> FFIResult> { + arg_types: SVec, +) -> FFIResult> { let arg_types = rresult_return!(rvec_wrapped_to_vec_datatype(&arg_types)); let arg_fields = arg_types @@ -155,11 +159,12 @@ unsafe extern "C" fn coerce_types_fn_wrapper( unsafe extern "C" fn invoke_with_args_fn_wrapper( udf: &FFI_ScalarUDF, - args: RVec, - arg_fields: RVec, + args: SVec, + arg_fields: SVec, number_rows: usize, return_field: WrappedSchema, -) -> FFIResult { + config_options: FFI_ConfigOptions, +) -> FFIResult { unsafe { let args = args .into_iter() @@ -181,28 +186,22 @@ unsafe extern "C" fn invoke_with_args_fn_wrapper( }) .collect::>>(); let arg_fields = rresult_return!(arg_fields); + let config_options = rresult_return!(ConfigOptions::try_from(config_options)); + let config_options = Arc::new(config_options); let args = ScalarFunctionArgs { args, arg_fields, number_rows, return_field, - // TODO: pass config options: https://github.com/apache/datafusion/issues/17035 - config_options: Arc::new(ConfigOptions::default()), + config_options, }; - let result = rresult_return!( + rresult!( udf.inner() .invoke_with_args(args) - .and_then(|r| r.to_array(number_rows)) - ); - - let (result_array, result_schema) = rresult_return!(to_ffi(&result.to_data())); - - RResult::ROk(WrappedArray { - array: result_array, - schema: WrappedSchema(result_schema), - }) + .and_then(FFI_ColumnarValue::try_from) + ) } } @@ -232,6 +231,10 @@ impl Clone for FFI_ScalarUDF { impl From> for FFI_ScalarUDF { fn from(udf: Arc) -> Self { + if let Some(udf) = udf.inner().as_any().downcast_ref::() { + return udf.udf.clone(); + } + let name = udf.name().into(); let aliases = udf.aliases().iter().map(|a| a.to_owned().into()).collect(); let volatility = udf.signature().volatility.into(); @@ -314,7 +317,7 @@ impl From<&FFI_ScalarUDF> for Arc { if (udf.library_marker_id)() == crate::get_library_marker_id() { Arc::clone(udf.inner().inner()) } else { - let name = udf.name.to_owned().into(); + let name = udf.name.to_string(); let signature = Signature::user_defined((&udf.volatility).into()); let aliases = udf.aliases.iter().map(|s| s.to_string()).collect(); @@ -366,8 +369,7 @@ impl ScalarUDFImpl for ForeignScalarUDF { arg_fields, number_rows, return_field, - // TODO: pass config options: https://github.com/apache/datafusion/issues/17035 - config_options: _config_options, + config_options, } = invoke_args; let args = args @@ -382,7 +384,8 @@ impl ScalarUDFImpl for ForeignScalarUDF { }) }) .collect::, ArrowError>>()? - .into(); + .into_iter() + .collect(); let arg_fields_wrapped = arg_fields .iter() @@ -392,10 +395,11 @@ impl ScalarUDFImpl for ForeignScalarUDF { let arg_fields = arg_fields_wrapped .into_iter() .map(WrappedSchema) - .collect::>(); + .collect::>(); let return_field = return_field.as_ref().clone(); let return_field = WrappedSchema(FFI_ArrowSchema::try_from(return_field)?); + let config_options = config_options.as_ref().into(); let result = unsafe { (self.udf.invoke_with_args)( @@ -404,13 +408,12 @@ impl ScalarUDFImpl for ForeignScalarUDF { arg_fields, number_rows, return_field, + config_options, ) }; let result = df_result!(result)?; - let result_array: ArrayRef = result.try_into()?; - - Ok(ColumnarValue::Array(result_array)) + result.try_into() } fn aliases(&self) -> &[String] { diff --git a/datafusion/ffi/src/udf/return_type_args.rs b/datafusion/ffi/src/udf/return_type_args.rs index 8fb015b7ed922..d21d7f4176fdc 100644 --- a/datafusion/ffi/src/udf/return_type_args.rs +++ b/datafusion/ffi/src/udf/return_type_args.rs @@ -15,23 +15,25 @@ // specific language governing permissions and limitations // under the License. -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RVec}; use arrow_schema::FieldRef; use datafusion_common::scalar::ScalarValue; use datafusion_common::{DataFusionError, ffi_datafusion_err}; use datafusion_expr::ReturnFieldArgs; use prost::Message; +use stabby::vec::Vec as StabbyVec; + use crate::arrow_wrappers::WrappedSchema; -use crate::util::{rvec_wrapped_to_vec_fieldref, vec_fieldref_to_rvec_wrapped}; +use crate::util::{ + FfiOption, rvec_wrapped_to_vec_fieldref, vec_fieldref_to_rvec_wrapped, +}; /// A stable struct for sharing a [`ReturnFieldArgs`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_ReturnFieldArgs { - arg_fields: RVec, - scalar_arguments: RVec>>, + arg_fields: StabbyVec, + scalar_arguments: StabbyVec>>, } impl TryFrom> for FFI_ReturnFieldArgs { @@ -47,13 +49,15 @@ impl TryFrom> for FFI_ReturnFieldArgs { .map(|arg| { let proto_value: datafusion_proto::protobuf::ScalarValue = arg.try_into()?; - let proto_bytes: RVec = proto_value.encode_to_vec().into(); + let proto_bytes: StabbyVec = + proto_value.encode_to_vec().into_iter().collect(); Ok(proto_bytes) }) .transpose() }) .collect(); - let scalar_arguments = scalar_arguments?.into_iter().map(ROption::from).collect(); + let scalar_arguments = + scalar_arguments?.into_iter().map(FfiOption::from).collect(); Ok(Self { arg_fields, @@ -91,7 +95,7 @@ impl TryFrom<&FFI_ReturnFieldArgs> for ForeignReturnFieldArgsOwned { let scalar_value: ScalarValue = (&proto_value).try_into()?; Ok(scalar_value) }); - Option::from(maybe_arg).transpose() + maybe_arg.transpose() }) .collect(); let scalar_arguments = scalar_arguments?.into_iter().collect(); diff --git a/datafusion/ffi/src/udtf.rs b/datafusion/ffi/src/udtf.rs index 6024ec755de58..bdcc5cbb9977f 100644 --- a/datafusion/ffi/src/udtf.rs +++ b/datafusion/ffi/src/udtf.rs @@ -18,8 +18,6 @@ use std::ffi::c_void; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{RResult, RVec}; use datafusion_catalog::{TableFunctionImpl, TableProvider}; use datafusion_common::error::Result; use datafusion_execution::TaskContext; @@ -31,22 +29,26 @@ use datafusion_proto::logical_plan::{ }; use datafusion_proto::protobuf::LogicalExprList; use prost::Message; + +use stabby::vec::Vec as StabbyVec; use tokio::runtime::Handle; use crate::execution::FFI_TaskContextProvider; use crate::proto::logical_extension_codec::FFI_LogicalExtensionCodec; use crate::table_provider::FFI_TableProvider; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiResult}; use crate::{df_result, rresult_return}; /// A stable struct for sharing a [`TableFunctionImpl`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_TableFunction { /// Equivalent to the `call` function of the TableFunctionImpl. /// The arguments are Expr passed as protobuf encoded bytes. - pub call: - unsafe extern "C" fn(udtf: &Self, args: RVec) -> FFIResult, + pub call: unsafe extern "C" fn( + udtf: &Self, + args: StabbyVec, + ) -> FFIResult, pub logical_codec: FFI_LogicalExtensionCodec, @@ -89,7 +91,7 @@ impl FFI_TableFunction { unsafe extern "C" fn call_fn_wrapper( udtf: &FFI_TableFunction, - args: RVec, + args: StabbyVec, ) -> FFIResult { let runtime = udtf.runtime(); let udtf_inner = udtf.inner(); @@ -107,7 +109,7 @@ unsafe extern "C" fn call_fn_wrapper( )); let table_provider = rresult_return!(udtf_inner.call(&args)); - RResult::ROk(FFI_TableProvider::new_with_ffi_codec( + FfiResult::Ok(FFI_TableProvider::new_with_ffi_codec( table_provider, false, runtime, @@ -213,7 +215,7 @@ impl TableFunctionImpl for ForeignTableFunction { let expr_list = LogicalExprList { expr: serialize_exprs(args, codec.as_ref())?, }; - let filters_serialized = expr_list.encode_to_vec().into(); + let filters_serialized = expr_list.encode_to_vec().into_iter().collect(); let table_provider = unsafe { (self.0.call)(&self.0, filters_serialized) }; diff --git a/datafusion/ffi/src/udwf/mod.rs b/datafusion/ffi/src/udwf/mod.rs index dbac00fd43020..36a505a911755 100644 --- a/datafusion/ffi/src/udwf/mod.rs +++ b/datafusion/ffi/src/udwf/mod.rs @@ -19,8 +19,6 @@ use std::ffi::c_void; use std::hash::{Hash, Hasher}; use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::{ROption, RResult, RString, RVec}; use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Schema, SchemaRef}; use arrow_schema::{Field, FieldRef}; @@ -36,30 +34,34 @@ use partition_evaluator_args::{ FFI_PartitionEvaluatorArgs, ForeignPartitionEvaluatorArgs, }; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; + mod partition_evaluator; mod partition_evaluator_args; mod range; use crate::arrow_wrappers::WrappedSchema; use crate::util::{ - FFIResult, rvec_wrapped_to_vec_datatype, rvec_wrapped_to_vec_fieldref, - vec_datatype_to_rvec_wrapped, vec_fieldref_to_rvec_wrapped, + FFIResult, FfiOption, FfiResult, rvec_wrapped_to_vec_datatype, + rvec_wrapped_to_vec_fieldref, vec_datatype_to_rvec_wrapped, + vec_fieldref_to_rvec_wrapped, }; -use crate::volatility::FFI_Volatility; +use crate::volatility::FfiVolatility; use crate::{df_result, rresult, rresult_return}; /// A stable struct for sharing a [`WindowUDF`] across FFI boundaries. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_WindowUDF { /// FFI equivalent to the `name` of a [`WindowUDF`] - pub name: RString, + pub name: StabbyString, /// FFI equivalent to the `aliases` of a [`WindowUDF`] - pub aliases: RVec, + pub aliases: StabbyVec, /// FFI equivalent to the `volatility` of a [`WindowUDF`] - pub volatility: FFI_Volatility, + pub volatility: FfiVolatility, pub partition_evaluator: unsafe extern "C" fn( udwf: &Self, @@ -69,8 +71,8 @@ pub struct FFI_WindowUDF { pub field: unsafe extern "C" fn( udwf: &Self, - input_types: RVec, - display_name: RString, + input_types: StabbyVec, + display_name: StabbyString, ) -> FFIResult, /// Performs type coercion. To simply this interface, all UDFs are treated as having @@ -79,10 +81,10 @@ pub struct FFI_WindowUDF { /// appropriate calls on the underlying [`WindowUDF`] pub coerce_types: unsafe extern "C" fn( udf: &Self, - arg_types: RVec, - ) -> FFIResult>, + arg_types: StabbyVec, + ) -> FFIResult>, - pub sort_options: ROption, + pub sort_options: FfiOption, /// Used to create a clone on the provider of the udf. This should /// only need to be called by the receiver of the udf. @@ -129,14 +131,14 @@ unsafe extern "C" fn partition_evaluator_fn_wrapper( let evaluator = rresult_return!(inner.partition_evaluator_factory((&args).into())); - RResult::ROk(evaluator.into()) + FfiResult::Ok(evaluator.into()) } } unsafe extern "C" fn field_fn_wrapper( udwf: &FFI_WindowUDF, - input_fields: RVec, - display_name: RString, + input_fields: StabbyVec, + display_name: StabbyString, ) -> FFIResult { unsafe { let inner = udwf.inner(); @@ -150,14 +152,14 @@ unsafe extern "C" fn field_fn_wrapper( let schema = Arc::new(Schema::new(vec![field])); - RResult::ROk(WrappedSchema::from(schema)) + FfiResult::Ok(WrappedSchema::from(schema)) } } unsafe extern "C" fn coerce_types_fn_wrapper( udwf: &FFI_WindowUDF, - arg_types: RVec, -) -> FFIResult> { + arg_types: StabbyVec, +) -> FFIResult> { unsafe { let inner = udwf.inner(); @@ -358,7 +360,7 @@ impl WindowUDFImpl for ForeignWindowUDF { } fn sort_options(&self) -> Option { - let options: Option<&FFI_SortOptions> = self.udf.sort_options.as_ref().into(); + let options: Option<&FFI_SortOptions> = self.udf.sort_options.as_ref(); options.map(|s| s.into()) } @@ -368,7 +370,7 @@ impl WindowUDFImpl for ForeignWindowUDF { } #[repr(C)] -#[derive(Debug, StableAbi, Clone)] +#[derive(Debug, Clone)] pub struct FFI_SortOptions { pub descending: bool, pub nulls_first: bool, diff --git a/datafusion/ffi/src/udwf/partition_evaluator.rs b/datafusion/ffi/src/udwf/partition_evaluator.rs index 8df02511aa4b3..0fd3032d84134 100644 --- a/datafusion/ffi/src/udwf/partition_evaluator.rs +++ b/datafusion/ffi/src/udwf/partition_evaluator.rs @@ -18,8 +18,6 @@ use std::ffi::c_void; use std::ops::Range; -use abi_stable::StableAbi; -use abi_stable::std_types::{RResult, RVec}; use arrow::array::ArrayRef; use arrow::error::ArrowError; use datafusion_common::scalar::ScalarValue; @@ -28,33 +26,35 @@ use datafusion_expr::PartitionEvaluator; use datafusion_expr::window_state::WindowAggState; use prost::Message; +use stabby::vec::Vec as StabbyVec; + use super::range::FFI_Range; use crate::arrow_wrappers::WrappedArray; -use crate::util::FFIResult; +use crate::util::{FFIResult, FfiResult}; use crate::{df_result, rresult, rresult_return}; /// A stable struct for sharing [`PartitionEvaluator`] across FFI boundaries. /// For an explanation of each field, see the corresponding function /// defined in [`PartitionEvaluator`]. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_PartitionEvaluator { pub evaluate_all: unsafe extern "C" fn( evaluator: &mut Self, - values: RVec, + values: StabbyVec, num_rows: usize, ) -> FFIResult, pub evaluate: unsafe extern "C" fn( evaluator: &mut Self, - values: RVec, + values: StabbyVec, range: FFI_Range, - ) -> FFIResult>, + ) -> FFIResult>, pub evaluate_all_with_rank: unsafe extern "C" fn( evaluator: &Self, num_rows: usize, - ranks_in_partition: RVec, + ranks_in_partition: StabbyVec, ) -> FFIResult, pub get_range: unsafe extern "C" fn( @@ -107,7 +107,7 @@ impl FFI_PartitionEvaluator { unsafe extern "C" fn evaluate_all_fn_wrapper( evaluator: &mut FFI_PartitionEvaluator, - values: RVec, + values: StabbyVec, num_rows: usize, ) -> FFIResult { unsafe { @@ -132,9 +132,9 @@ unsafe extern "C" fn evaluate_all_fn_wrapper( unsafe extern "C" fn evaluate_fn_wrapper( evaluator: &mut FFI_PartitionEvaluator, - values: RVec, + values: StabbyVec, range: FFI_Range, -) -> FFIResult> { +) -> FFIResult> { unsafe { let inner = evaluator.inner_mut(); @@ -151,14 +151,14 @@ unsafe extern "C" fn evaluate_fn_wrapper( let proto_result: datafusion_proto::protobuf::ScalarValue = rresult_return!((&scalar_result).try_into()); - RResult::ROk(proto_result.encode_to_vec().into()) + FfiResult::Ok(proto_result.encode_to_vec().into_iter().collect()) } } unsafe extern "C" fn evaluate_all_with_rank_fn_wrapper( evaluator: &FFI_PartitionEvaluator, num_rows: usize, - ranks_in_partition: RVec, + ranks_in_partition: StabbyVec, ) -> FFIResult { unsafe { let inner = evaluator.inner(); @@ -284,7 +284,7 @@ impl PartitionEvaluator for ForeignPartitionEvaluator { let values = values .iter() .map(WrappedArray::try_from) - .collect::, ArrowError>>()?; + .collect::, ArrowError>>()?; (self.evaluator.evaluate_all)(&mut self.evaluator, values, num_rows) }; @@ -302,7 +302,7 @@ impl PartitionEvaluator for ForeignPartitionEvaluator { let values = values .iter() .map(WrappedArray::try_from) - .collect::, ArrowError>>()?; + .collect::, ArrowError>>()?; let scalar_bytes = df_result!((self.evaluator.evaluate)( &mut self.evaluator, diff --git a/datafusion/ffi/src/udwf/partition_evaluator_args.rs b/datafusion/ffi/src/udwf/partition_evaluator_args.rs index ffad1f41ee42d..2a30373632784 100644 --- a/datafusion/ffi/src/udwf/partition_evaluator_args.rs +++ b/datafusion/ffi/src/udwf/partition_evaluator_args.rs @@ -17,14 +17,13 @@ use std::sync::Arc; -use abi_stable::StableAbi; -use abi_stable::std_types::RVec; use arrow::error::ArrowError; use arrow::ffi::FFI_ArrowSchema; use arrow_schema::FieldRef; use datafusion_common::{DataFusionError, Result}; use datafusion_expr::function::PartitionEvaluatorArgs; use datafusion_physical_plan::PhysicalExpr; +use stabby::vec::Vec as StabbyVec; use crate::arrow_wrappers::WrappedSchema; use crate::physical_expr::FFI_PhysicalExpr; @@ -34,10 +33,10 @@ use crate::util::rvec_wrapped_to_vec_fieldref; /// For an explanation of each field, see the corresponding function /// defined in [`PartitionEvaluatorArgs`]. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_PartitionEvaluatorArgs { - input_exprs: RVec, - input_fields: RVec, + input_exprs: StabbyVec, + input_fields: StabbyVec, is_reversed: bool, ignore_nulls: bool, } @@ -58,7 +57,8 @@ impl TryFrom> for FFI_PartitionEvaluatorArgs { .iter() .map(|input_type| FFI_ArrowSchema::try_from(input_type).map(WrappedSchema)) .collect::, ArrowError>>()? - .into(); + .into_iter() + .collect(); Ok(Self { input_exprs, diff --git a/datafusion/ffi/src/udwf/range.rs b/datafusion/ffi/src/udwf/range.rs index 19a908c5e2454..558fd058a67cf 100644 --- a/datafusion/ffi/src/udwf/range.rs +++ b/datafusion/ffi/src/udwf/range.rs @@ -17,13 +17,11 @@ use std::ops::Range; -use abi_stable::StableAbi; - /// A stable struct for sharing [`Range`] across FFI boundaries. /// For an explanation of each field, see the corresponding function /// defined in [`Range`]. #[repr(C)] -#[derive(Debug, StableAbi)] +#[derive(Debug)] pub struct FFI_Range { pub start: usize, pub end: usize, diff --git a/datafusion/ffi/src/util.rs b/datafusion/ffi/src/util.rs index db6eb0552d2aa..964cd66b834e6 100644 --- a/datafusion/ffi/src/util.rs +++ b/datafusion/ffi/src/util.rs @@ -17,49 +17,53 @@ use std::sync::Arc; -use abi_stable::std_types::{RResult, RString, RVec}; use arrow::datatypes::{DataType, Field}; use arrow::ffi::FFI_ArrowSchema; use arrow_schema::FieldRef; +use stabby::string::String as StabbyString; +use stabby::vec::Vec as StabbyVec; use crate::arrow_wrappers::WrappedSchema; +// Re-export for convenience +pub use crate::ffi_option::{FfiOption, FfiResult}; + /// Convenience type for results passed through the FFI boundary. Since the /// `DataFusionError` enum is complex and little value is gained from creating /// a FFI safe variant of it, we convert errors to strings when passing results /// back. These are converted back and forth using the `df_result`, `rresult`, /// and `rresult_return` macros. -pub type FFIResult = RResult; +pub type FFIResult = FfiResult; -/// This macro is a helpful conversion utility to convert from an abi_stable::RResult to a +/// This macro is a helpful conversion utility to convert from an FFIResult to a /// DataFusion result. #[macro_export] macro_rules! df_result { ( $x:expr ) => { - match $x { - abi_stable::std_types::RResult::ROk(v) => Ok(v), - abi_stable::std_types::RResult::RErr(err) => { + match Into::<::std::result::Result<_, _>>::into($x) { + Ok(v) => Ok(v), + Err(err) => { datafusion_common::ffi_err!("{err}") } } }; } -/// This macro is a helpful conversion utility to convert from a DataFusion Result to an abi_stable::RResult +/// This macro is a helpful conversion utility to convert from a DataFusion Result to an FFIResult. #[macro_export] macro_rules! rresult { ( $x:expr ) => { match $x { - Ok(v) => abi_stable::std_types::RResult::ROk(v), - Err(e) => abi_stable::std_types::RResult::RErr( - abi_stable::std_types::RString::from(e.to_string()), - ), + Ok(v) => $crate::ffi_option::FfiResult::Ok(v), + Err(e) => $crate::ffi_option::FfiResult::Err(stabby::string::String::from( + e.to_string().as_str(), + )), } }; } -/// This macro is a helpful conversion utility to convert from a DataFusion Result to an abi_stable::RResult -/// and to also call return when it is an error. Since you cannot use `?` on an RResult, this is designed +/// This macro is a helpful conversion utility to convert from a DataFusion Result to an FFIResult +/// and to also call return when it is an error. Since you cannot use `?` on an FFIResult, this is designed /// to mimic the pattern. #[macro_export] macro_rules! rresult_return { @@ -67,9 +71,9 @@ macro_rules! rresult_return { match $x { Ok(v) => v, Err(e) => { - return abi_stable::std_types::RResult::RErr( - abi_stable::std_types::RString::from(e.to_string()), - ) + return $crate::ffi_option::FfiResult::Err(stabby::string::String::from( + e.to_string().as_str(), + )) } } }; @@ -79,7 +83,7 @@ macro_rules! rresult_return { /// FFI friendly counterpart, [`WrappedSchema`] pub fn vec_fieldref_to_rvec_wrapped( fields: &[FieldRef], -) -> Result, arrow::error::ArrowError> { +) -> Result, arrow::error::ArrowError> { Ok(fields .iter() .map(FFI_ArrowSchema::try_from) @@ -92,7 +96,7 @@ pub fn vec_fieldref_to_rvec_wrapped( /// This is a utility function to convert an FFI friendly vector of [`WrappedSchema`] /// to their equivalent [`Field`]. pub fn rvec_wrapped_to_vec_fieldref( - fields: &RVec, + fields: &StabbyVec, ) -> Result, arrow::error::ArrowError> { fields .iter() @@ -104,7 +108,7 @@ pub fn rvec_wrapped_to_vec_fieldref( /// FFI friendly counterpart, [`WrappedSchema`] pub fn vec_datatype_to_rvec_wrapped( data_types: &[DataType], -) -> Result, arrow::error::ArrowError> { +) -> Result, arrow::error::ArrowError> { Ok(data_types .iter() .map(FFI_ArrowSchema::try_from) @@ -117,7 +121,7 @@ pub fn vec_datatype_to_rvec_wrapped( /// This is a utility function to convert an FFI friendly vector of [`WrappedSchema`] /// to their equivalent [`DataType`]. pub fn rvec_wrapped_to_vec_datatype( - data_types: &RVec, + data_types: &StabbyVec, ) -> Result, arrow::error::ArrowError> { data_types .iter() @@ -129,12 +133,13 @@ pub fn rvec_wrapped_to_vec_datatype( pub(crate) mod tests { use std::sync::Arc; - use abi_stable::std_types::{RResult, RString}; use datafusion::error::DataFusionError; use datafusion::prelude::SessionContext; use datafusion_execution::TaskContextProvider; + use stabby::string::String as StabbyString; use crate::execution::FFI_TaskContextProvider; + use crate::ffi_option::FfiResult; use crate::util::FFIResult; pub(crate) fn test_session_and_ctx() -> (Arc, FFI_TaskContextProvider) @@ -147,7 +152,7 @@ pub(crate) mod tests { } fn wrap_result(result: Result) -> FFIResult { - RResult::ROk(rresult_return!(result)) + FfiResult::Ok(rresult_return!(result)) } #[test] @@ -155,14 +160,14 @@ pub(crate) mod tests { const VALID_VALUE: &str = "valid_value"; const ERROR_VALUE: &str = "error_value"; - let ok_r_result: FFIResult = - RResult::ROk(VALID_VALUE.to_string().into()); - let err_r_result: FFIResult = - RResult::RErr(ERROR_VALUE.to_string().into()); + let ok_r_result: FFIResult = + FfiResult::Ok(StabbyString::from(VALID_VALUE)); + let err_r_result: FFIResult = + FfiResult::Err(StabbyString::from(ERROR_VALUE)); let returned_ok_result = df_result!(ok_r_result); assert!(returned_ok_result.is_ok()); - assert!(returned_ok_result.unwrap().to_string() == VALID_VALUE); + assert!(*returned_ok_result.unwrap() == *VALID_VALUE); let returned_err_result = df_result!(err_r_result); assert!(returned_err_result.is_err()); @@ -176,13 +181,16 @@ pub(crate) mod tests { datafusion_common::ffi_err!("{ERROR_VALUE}"); let returned_ok_r_result = wrap_result(ok_result); - assert!(returned_ok_r_result == RResult::ROk(VALID_VALUE.into())); + let std_result: Result = returned_ok_r_result.into(); + assert!(std_result == Ok(VALID_VALUE.into())); let returned_err_r_result = wrap_result(err_result); - assert!(returned_err_r_result.is_err()); + let std_result: Result = returned_err_r_result.into(); + assert!(std_result.is_err()); assert!( - returned_err_r_result + std_result .unwrap_err() + .as_str() .starts_with(format!("FFI error: {ERROR_VALUE}").as_str()) ); } diff --git a/datafusion/ffi/src/volatility.rs b/datafusion/ffi/src/volatility.rs index bc714ae59587d..c0edd4814aaf7 100644 --- a/datafusion/ffi/src/volatility.rs +++ b/datafusion/ffi/src/volatility.rs @@ -15,18 +15,17 @@ // specific language governing permissions and limitations // under the License. -use abi_stable::StableAbi; use datafusion_expr::Volatility; -#[repr(C)] -#[derive(Debug, StableAbi, Clone)] -pub enum FFI_Volatility { +#[repr(u8)] +#[derive(Debug, Clone)] +pub enum FfiVolatility { Immutable, Stable, Volatile, } -impl From for FFI_Volatility { +impl From for FfiVolatility { fn from(value: Volatility) -> Self { match value { Volatility::Immutable => Self::Immutable, @@ -36,12 +35,12 @@ impl From for FFI_Volatility { } } -impl From<&FFI_Volatility> for Volatility { - fn from(value: &FFI_Volatility) -> Self { +impl From<&FfiVolatility> for Volatility { + fn from(value: &FfiVolatility) -> Self { match value { - FFI_Volatility::Immutable => Self::Immutable, - FFI_Volatility::Stable => Self::Stable, - FFI_Volatility::Volatile => Self::Volatile, + FfiVolatility::Immutable => Self::Immutable, + FfiVolatility::Stable => Self::Stable, + FfiVolatility::Volatile => Self::Volatile, } } } @@ -50,10 +49,10 @@ impl From<&FFI_Volatility> for Volatility { mod tests { use datafusion::logical_expr::Volatility; - use super::FFI_Volatility; + use super::FfiVolatility; fn test_round_trip_volatility(volatility: Volatility) { - let ffi_volatility: FFI_Volatility = volatility.into(); + let ffi_volatility: FfiVolatility = volatility.into(); let round_trip: Volatility = (&ffi_volatility).into(); assert_eq!(volatility, round_trip); diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index ace69de66f5c3..76cf786c954dc 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -352,8 +352,6 @@ fn array_has_dispatch_for_scalar( haystack: ArrayWrapper<'_>, needle: &dyn Datum, ) -> Result { - let values = haystack.values(); - let is_nested = values.data_type().is_nested(); // If first argument is empty list (second argument is non-null), return false // i.e. array_has([], non-null element) -> false if haystack.len() == 0 { @@ -362,7 +360,17 @@ fn array_has_dispatch_for_scalar( None, ))); } - let eq_array = compare_with_eq(values, needle, is_nested)?; + + // For sliced ListArrays, values() returns the full underlying array but + // only elements between the first and last offset are visible. + let offsets: Vec = haystack.offsets().collect(); + let first_offset = offsets[0]; + let visible_values = haystack + .values() + .slice(first_offset, offsets[offsets.len() - 1] - first_offset); + + let is_nested = visible_values.data_type().is_nested(); + let eq_array = compare_with_eq(&visible_values, needle, is_nested)?; // When a haystack element is null, `eq()` returns null (not false). // In Arrow, a null BooleanArray entry has validity=0 but an @@ -382,10 +390,14 @@ fn array_has_dispatch_for_scalar( ArrayWrapper::LargeList(arr) => arr.nulls(), }; let mut matches = eq_bits.set_indices().peekable(); - let mut values = BooleanBufferBuilder::new(haystack.len()); - values.append_n(haystack.len(), false); + let mut result = BooleanBufferBuilder::new(haystack.len()); + result.append_n(haystack.len(), false); + + // Match positions are relative to visible_values (0-based), so + // subtract first_offset from each offset when comparing. + for (i, window) in offsets.windows(2).enumerate() { + let end = window[1] - first_offset; - for (i, (_start, end)) in haystack.offsets().tuple_windows().enumerate() { let has_match = matches.peek().is_some_and(|&p| p < end); // Advance past all match positions in this row's range. @@ -394,14 +406,14 @@ fn array_has_dispatch_for_scalar( } if has_match && validity.is_none_or(|v| v.is_valid(i)) { - values.set_bit(i, true); + result.set_bit(i, true); } } // A null haystack row always produces a null output, so we can // reuse the haystack's null buffer directly. Ok(Arc::new(BooleanArray::new( - values.finish(), + result.finish(), validity.cloned(), ))) } @@ -1066,6 +1078,52 @@ mod tests { Ok(()) } + #[test] + fn test_array_has_sliced_list() -> Result<(), DataFusionError> { + // [[10, 20], [30, 40], [50, 60], [70, 80]] → slice(1,2) → [[30, 40], [50, 60]] + let list = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(10), Some(20)]), + Some(vec![Some(30), Some(40)]), + Some(vec![Some(50), Some(60)]), + Some(vec![Some(70), Some(80)]), + ]); + let sliced = list.slice(1, 2); + let haystack_field = + Arc::new(Field::new("haystack", sliced.data_type().clone(), true)); + let needle_field = Arc::new(Field::new("needle", DataType::Int32, true)); + let return_field = Arc::new(Field::new("return", DataType::Boolean, true)); + + // Search for elements that exist only in sliced-away rows: + // 10 is in the prefix row, 70 is in the suffix row. + let invoke = |needle: i32| -> Result { + ArrayHas::new() + .invoke_with_args(ScalarFunctionArgs { + args: vec![ + ColumnarValue::Array(Arc::new(sliced.clone())), + ColumnarValue::Scalar(ScalarValue::Int32(Some(needle))), + ], + arg_fields: vec![ + Arc::clone(&haystack_field), + Arc::clone(&needle_field), + ], + number_rows: 2, + return_field: Arc::clone(&return_field), + config_options: Arc::new(ConfigOptions::default()), + })? + .into_array(2) + }; + + let output = invoke(10)?.as_boolean().clone(); + assert!(!output.value(0)); + assert!(!output.value(1)); + + let output = invoke(70)?.as_boolean().clone(); + assert!(!output.value(0)); + assert!(!output.value(1)); + + Ok(()) + } + #[test] fn test_array_has_list_null_haystack() -> Result<(), DataFusionError> { let haystack_field = Arc::new(Field::new("haystack", DataType::Null, true)); diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index ba16d08538c6d..0214b1552bc9c 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -230,26 +230,36 @@ fn array_position_scalar( "array_position", &[list_array.values(), element_array], )?; - let element_datum = Scalar::new(Arc::clone(element_array)); - - let offsets = list_array.offsets(); - let validity = list_array.nulls(); if list_array.len() == 0 { return Ok(Arc::new(UInt64Array::new_null(0))); } + let element_datum = Scalar::new(Arc::clone(element_array)); + let validity = list_array.nulls(); + + // Only compare the visible portion of the values buffer, which avoids + // wasted work for sliced ListArrays. + let offsets = list_array.offsets(); + let first_offset = offsets[0].as_usize(); + let last_offset = offsets[list_array.len()].as_usize(); + let visible_values = list_array + .values() + .slice(first_offset, last_offset - first_offset); + // `not_distinct` treats NULL=NULL as true, matching the semantics of // `array_position` - let eq_array = arrow_ord::cmp::not_distinct(list_array.values(), &element_datum)?; + let eq_array = arrow_ord::cmp::not_distinct(&visible_values, &element_datum)?; let eq_bits = eq_array.values(); let mut result: Vec> = Vec::with_capacity(list_array.len()); let mut matches = eq_bits.set_indices().peekable(); + // Match positions are relative to visible_values (0-based), so + // subtract first_offset from each offset when comparing. for i in 0..list_array.len() { - let start = offsets[i].as_usize(); - let end = offsets[i + 1].as_usize(); + let start = offsets[i].as_usize() - first_offset; + let end = offsets[i + 1].as_usize() - first_offset; if validity.is_some_and(|v| v.is_null(i)) { // Null row -> null output; advance past matches in range @@ -474,3 +484,60 @@ fn general_positions( ListArray::from_iter_primitive::(data), )) } + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::AsArray; + use arrow::datatypes::Int32Type; + use datafusion_common::config::ConfigOptions; + use datafusion_expr::ScalarFunctionArgs; + + #[test] + fn test_array_position_sliced_list() -> Result<()> { + // [[10, 20], [30, 40], [50, 60], [70, 80]] → slice(1,2) → [[30, 40], [50, 60]] + let list = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(10), Some(20)]), + Some(vec![Some(30), Some(40)]), + Some(vec![Some(50), Some(60)]), + Some(vec![Some(70), Some(80)]), + ]); + let sliced = list.slice(1, 2); + let haystack_field = + Arc::new(Field::new("haystack", sliced.data_type().clone(), true)); + let needle_field = Arc::new(Field::new("needle", DataType::Int32, true)); + let return_field = Arc::new(Field::new("return", UInt64, true)); + + // Search for elements that exist only in sliced-away rows: + // 10 is in the prefix row, 70 is in the suffix row. + let invoke = |needle: i32| -> Result { + ArrayPosition::new() + .invoke_with_args(ScalarFunctionArgs { + args: vec![ + ColumnarValue::Array(Arc::new(sliced.clone())), + ColumnarValue::Scalar(ScalarValue::Int32(Some(needle))), + ], + arg_fields: vec![ + Arc::clone(&haystack_field), + Arc::clone(&needle_field), + ], + number_rows: 2, + return_field: Arc::clone(&return_field), + config_options: Arc::new(ConfigOptions::default()), + })? + .into_array(2) + }; + + let output = invoke(10)?; + let output = output.as_primitive::(); + assert!(output.is_null(0)); + assert!(output.is_null(1)); + + let output = invoke(70)?; + let output = output.as_primitive::(); + assert!(output.is_null(0)); + assert!(output.is_null(1)); + + Ok(()) + } +} diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 72eae396e68a6..02628b405ec6c 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -715,7 +715,7 @@ impl BinaryExpr { StringConcat => concat_elements(&left, &right), AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt | HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe - | IntegerDivide => { + | IntegerDivide | Colon => { not_impl_err!( "Binary operator '{:?}' is not supported in the physical expr", self.op diff --git a/datafusion/physical-plan/src/joins/hash_join/stream.rs b/datafusion/physical-plan/src/joins/hash_join/stream.rs index 8af26c1b8a055..b31982ea3b7b4 100644 --- a/datafusion/physical-plan/src/joins/hash_join/stream.rs +++ b/datafusion/physical-plan/src/joins/hash_join/stream.rs @@ -713,6 +713,7 @@ impl HashJoinStream { filter, JoinSide::Left, None, + self.join_type, )? } else { (left_indices, right_indices) @@ -781,6 +782,7 @@ impl HashJoinStream { &right_indices, &self.column_indices, join_side, + self.join_type, )?; let push_status = self.output_buffer.push_batch(batch)?; @@ -899,6 +901,7 @@ impl HashJoinStream { &right_side, &self.column_indices, JoinSide::Left, + self.join_type, )?; let push_status = self.output_buffer.push_batch(batch)?; diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 29917dd2cca22..7407b05ea5695 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -941,6 +941,7 @@ pub(crate) fn build_side_determined_results( &probe_indices, column_indices, build_hash_joiner.build_side, + join_type, ) .map(|batch| (batch.num_rows() > 0).then_some(batch)) } else { @@ -1004,6 +1005,7 @@ pub(crate) fn join_with_probe_batch( filter, build_hash_joiner.build_side, None, + join_type, )? } else { (build_indices, probe_indices) @@ -1042,6 +1044,7 @@ pub(crate) fn join_with_probe_batch( &probe_indices, column_indices, build_hash_joiner.build_side, + join_type, ) .map(|batch| (batch.num_rows() > 0).then_some(batch)) } diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs index 34993fcdbddc9..cf4bf2cd163fd 100644 --- a/datafusion/physical-plan/src/joins/utils.rs +++ b/datafusion/physical-plan/src/joins/utils.rs @@ -910,6 +910,7 @@ pub(crate) fn get_final_indices_from_bit_map( (left_indices, right_indices) } +#[expect(clippy::too_many_arguments)] pub(crate) fn apply_join_filter_to_indices( build_input_buffer: &RecordBatch, probe_batch: &RecordBatch, @@ -918,6 +919,7 @@ pub(crate) fn apply_join_filter_to_indices( filter: &JoinFilter, build_side: JoinSide, max_intermediate_size: Option, + join_type: JoinType, ) -> Result<(UInt64Array, UInt32Array)> { if build_indices.is_empty() && probe_indices.is_empty() { return Ok((build_indices, probe_indices)); @@ -938,6 +940,7 @@ pub(crate) fn apply_join_filter_to_indices( &probe_indices.slice(i, len), filter.column_indices(), build_side, + join_type, )?; let filter_result = filter .expression() @@ -959,6 +962,7 @@ pub(crate) fn apply_join_filter_to_indices( &probe_indices, filter.column_indices(), build_side, + join_type, )?; filter @@ -990,6 +994,7 @@ fn new_empty_schema_batch(schema: &Schema, row_count: usize) -> Result Result { if schema.fields().is_empty() { - return new_empty_schema_batch(schema, build_indices.len()); + // For RightAnti and RightSemi joins, after `adjust_indices_by_join_type` + // the build_indices were untouched so only probe_indices hold the actual + // row count. + let row_count = match join_type { + JoinType::RightAnti | JoinType::RightSemi => probe_indices.len(), + _ => build_indices.len(), + }; + return new_empty_schema_batch(schema, row_count); } // build the columns of the new [RecordBatch]: diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index b3ea548d53750..5b64f0b2a6186 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -730,37 +730,27 @@ impl ExternalSorter { // Sort the batch immediately and get all output batches let sorted_batches = sort_batch_chunked(&batch, &expressions, batch_size)?; - // Free the old reservation and grow it to match the actual sorted output size - reservation.free(); + // Resize the reservation to match the actual sorted output size. + // Using try_resize avoids a release-then-reacquire cycle, which + // matters for MemoryPool implementations where grow/shrink have + // non-trivial cost (e.g. JNI calls in Comet). + let total_sorted_size: usize = sorted_batches + .iter() + .map(get_record_batch_memory_size) + .sum(); + reservation + .try_resize(total_sorted_size) + .map_err(Self::err_with_oom_context)?; - Result::<_, DataFusionError>::Ok((schema, sorted_batches, reservation)) - }) - .then({ - move |batches| async move { - match batches { - Ok((schema, sorted_batches, reservation)) => { - // Calculate the total size of sorted batches - let total_sorted_size: usize = sorted_batches - .iter() - .map(get_record_batch_memory_size) - .sum(); - reservation - .try_grow(total_sorted_size) - .map_err(Self::err_with_oom_context)?; - - // Wrap in ReservationStream to hold the reservation - Ok(Box::pin(ReservationStream::new( - Arc::clone(&schema), - Box::pin(RecordBatchStreamAdapter::new( - schema, - futures::stream::iter(sorted_batches.into_iter().map(Ok)), - )), - reservation, - )) as SendableRecordBatchStream) - } - Err(e) => Err(e), - } - } + // Wrap in ReservationStream to hold the reservation + Result::<_, DataFusionError>::Ok(Box::pin(ReservationStream::new( + Arc::clone(&schema), + Box::pin(RecordBatchStreamAdapter::new( + Arc::clone(&schema), + futures::stream::iter(sorted_batches.into_iter().map(Ok)), + )), + reservation, + )) as SendableRecordBatchStream) }) .try_flatten() .map(move |batch| match batch { diff --git a/datafusion/sql/src/expr/binary_op.rs b/datafusion/sql/src/expr/binary_op.rs index edad5bbc6daad..4e9025e02e0c7 100644 --- a/datafusion/sql/src/expr/binary_op.rs +++ b/datafusion/sql/src/expr/binary_op.rs @@ -22,7 +22,7 @@ use sqlparser::ast::BinaryOperator; impl SqlToRel<'_, S> { pub(crate) fn parse_sql_binary_op(&self, op: &BinaryOperator) -> Result { - match *op { + match op { BinaryOperator::Gt => Ok(Operator::Gt), BinaryOperator::GtEq => Ok(Operator::GtEq), BinaryOperator::Lt => Ok(Operator::Lt), @@ -68,6 +68,7 @@ impl SqlToRel<'_, S> { BinaryOperator::Question => Ok(Operator::Question), BinaryOperator::QuestionAnd => Ok(Operator::QuestionAnd), BinaryOperator::QuestionPipe => Ok(Operator::QuestionPipe), + BinaryOperator::Custom(s) if s == ":" => Ok(Operator::Colon), _ => not_impl_err!("Unsupported binary operator: {:?}", op), } } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 9aa5be8131dcb..7902eed1e6922 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -22,8 +22,8 @@ use datafusion_expr::planner::{ use sqlparser::ast::{ AccessExpr, BinaryOperator, CastFormat, CastKind, CeilFloorKind, DataType as SQLDataType, DateTimeField, DictionaryField, Expr as SQLExpr, - ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript, TrimWhereField, - TypedString, Value, ValueWithSpan, + ExprWithAlias as SQLExprWithAlias, JsonPath, MapEntry, StructField, Subscript, + TrimWhereField, TypedString, Value, ValueWithSpan, }; use datafusion_common::{ @@ -651,10 +651,36 @@ impl SqlToRel<'_, S> { options: Box::new(WildcardOptions::default()), }), SQLExpr::Tuple(values) => self.parse_tuple(schema, planner_context, values), + SQLExpr::JsonAccess { value, path } => { + self.parse_json_access(schema, planner_context, value, &path) + } _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), } } + fn parse_json_access( + &self, + schema: &DFSchema, + planner_context: &mut PlannerContext, + value: Box, + path: &JsonPath, + ) -> Result { + let json_path = path.to_string(); + let json_path = if let Some(json_path) = json_path.strip_prefix(":") { + // sqlparser's JsonPath display adds an extra `:` at the beginning. + json_path.to_owned() + } else { + json_path + }; + self.build_logical_expr( + BinaryOperator::Custom(":".to_owned()), + self.sql_to_expr(*value, schema, planner_context)?, + // pass json path as a string literal, let the impl parse it when needed. + Expr::Literal(ScalarValue::Utf8(Some(json_path)), None), + schema, + ) + } + /// Parses a struct(..) expression and plans it creation fn parse_struct( &self, diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 59a9207b51ef0..b82ab24adef71 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1094,6 +1094,7 @@ impl Unparser<'_> { Operator::Question => Ok(BinaryOperator::Question), Operator::QuestionAnd => Ok(BinaryOperator::QuestionAnd), Operator::QuestionPipe => Ok(BinaryOperator::QuestionPipe), + Operator::Colon => Ok(BinaryOperator::Custom(":".to_owned())), } } diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 4717b843abb53..670046f164ed3 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -2821,3 +2821,39 @@ fn test_struct_expr3() { @r#"SELECT test.c1."metadata".product."name" FROM (SELECT {"metadata": {product: {"name": 'Product Name'}}} AS c1) AS test"# ); } + +#[test] +fn test_json_access_1() { + let statement = generate_round_trip_statement( + GenericDialect {}, + r#"SELECT j1_string:field FROM j1"#, + ); + assert_snapshot!( + statement, + @r#"SELECT (j1.j1_string : 'field') FROM j1"# + ); +} + +#[test] +fn test_json_access_2() { + let statement = generate_round_trip_statement( + GenericDialect {}, + r#"SELECT j1_string:field[0] FROM j1"#, + ); + assert_snapshot!( + statement, + @r#"SELECT (j1.j1_string : 'field[0]') FROM j1"# + ); +} + +#[test] +fn test_json_access_3() { + let statement = generate_round_trip_statement( + GenericDialect {}, + r#"SELECT j1_string:field.inner1['inner2'] FROM j1"#, + ); + assert_snapshot!( + statement, + @r#"SELECT (j1.j1_string : 'field.inner1[''inner2'']') FROM j1"# + ); +} diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 282d7c374bdda..228918c3855f2 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -5318,3 +5318,46 @@ DROP TABLE issue_20437_small; statement count 0 DROP TABLE issue_20437_large; + +# Test count(*) with right semi/anti joins returns correct row counts +# issue: https://github.com/apache/datafusion/issues/20669 + +statement ok +CREATE TABLE t1 (k INT, v INT); + +statement ok +CREATE TABLE t2 (k INT, v INT); + +statement ok +INSERT INTO t1 SELECT i AS k, i AS v FROM generate_series(1, 100) t(i); + +statement ok +INSERT INTO t2 VALUES (1, 1); + +query I +WITH t AS ( + SELECT * + FROM t1 + LEFT ANTI JOIN t2 ON t1.k = t2.k +) +SELECT count(*) +FROM t; +---- +99 + +query I +WITH t AS ( + SELECT * + FROM t1 + LEFT SEMI JOIN t2 ON t1.k = t2.k +) +SELECT count(*) +FROM t; +---- +1 + +statement count 0 +DROP TABLE t1; + +statement count 0 +DROP TABLE t2; diff --git a/datafusion/substrait/src/logical_plan/producer/expr/scalar_function.rs b/datafusion/substrait/src/logical_plan/producer/expr/scalar_function.rs index bd8a9d9a99b53..9f70e903a0bd9 100644 --- a/datafusion/substrait/src/logical_plan/producer/expr/scalar_function.rs +++ b/datafusion/substrait/src/logical_plan/producer/expr/scalar_function.rs @@ -344,5 +344,6 @@ pub fn operator_to_name(op: Operator) -> &'static str { Operator::BitwiseXor => "bitwise_xor", Operator::BitwiseShiftRight => "bitwise_shift_right", Operator::BitwiseShiftLeft => "bitwise_shift_left", + Operator::Colon => "colon", } } diff --git a/dev/changelog/53.0.0.md b/dev/changelog/53.0.0.md new file mode 100644 index 0000000000000..91306c7f49a6d --- /dev/null +++ b/dev/changelog/53.0.0.md @@ -0,0 +1,602 @@ + + +# Apache DataFusion 53.0.0 Changelog + +This release consists of 447 commits from 105 contributors. See credits at the end of this changelog for more information. + +See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions. + +**Breaking changes:** + +- Allow logical optimizer to be run without evaluating now() & refactor SimplifyInfo [#19505](https://github.com/apache/datafusion/pull/19505) (adriangb) +- Make default ListingFilesCache table scoped [#19616](https://github.com/apache/datafusion/pull/19616) (jizezhang) +- chore(deps): Update sqlparser to 0.60 [#19672](https://github.com/apache/datafusion/pull/19672) (Standing-Man) +- Do not require mut in memory reservation methods [#19759](https://github.com/apache/datafusion/pull/19759) (gabotechs) +- refactor: make PhysicalExprAdatperFactory::create fallible [#20017](https://github.com/apache/datafusion/pull/20017) (niebayes) +- Add `ScalarValue::RunEndEncoded` variant [#19895](https://github.com/apache/datafusion/pull/19895) (Jefffrey) +- minor: remove unused crypto functions & narrow public API [#20045](https://github.com/apache/datafusion/pull/20045) (Jefffrey) +- Wrap immutable plan parts into Arc (make creating `ExecutionPlan`s less costly) [#19893](https://github.com/apache/datafusion/pull/19893) (askalt) +- feat: Support planning subqueries with OuterReferenceColumn belongs to non-adjacent outer relations [#19930](https://github.com/apache/datafusion/pull/19930) (mkleen) +- Remove the statistics() api in execution plan [#20319](https://github.com/apache/datafusion/pull/20319) (xudong963) +- Remove recursive const check in `simplify_const_expr` [#20234](https://github.com/apache/datafusion/pull/20234) (AdamGS) +- Cache `PlanProperties`, add fast-path for `with_new_children` [#19792](https://github.com/apache/datafusion/pull/19792) (askalt) + +**Performance related:** + +- perf: optimize `HashTableLookupExpr::evaluate` [#19602](https://github.com/apache/datafusion/pull/19602) (UBarney) +- perf: Improve performance of `split_part` [#19570](https://github.com/apache/datafusion/pull/19570) (andygrove) +- Optimize `Nullstate` / accumulators [#19625](https://github.com/apache/datafusion/pull/19625) (Dandandan) +- perf: optimize `NthValue` when `ignore_nulls` is true [#19496](https://github.com/apache/datafusion/pull/19496) (mzabaluev) +- Optimize `concat/concat_ws` scalar path by pre-allocating memory [#19547](https://github.com/apache/datafusion/pull/19547) (lyne7-sc) +- perf: optimize left function by eliminating double chars() iteration [#19571](https://github.com/apache/datafusion/pull/19571) (viirya) +- perf: Optimize floor and ceil scalar performance [#19752](https://github.com/apache/datafusion/pull/19752) (kumarUjjawal) +- perf: improve performance of `spark hex` function [#19738](https://github.com/apache/datafusion/pull/19738) (lyne7-sc) +- perf: Optimize initcap scalar performance [#19776](https://github.com/apache/datafusion/pull/19776) (kumarUjjawal) +- Row group limit pruning for row groups that entirely match predicates [#18868](https://github.com/apache/datafusion/pull/18868) (xudong963) +- perf: Optimize trunc scalar performance [#19788](https://github.com/apache/datafusion/pull/19788) (kumarUjjawal) +- perf: optimize `spark_hex` dictionary path by avoiding dictionary expansion [#19832](https://github.com/apache/datafusion/pull/19832) (lyne7-sc) +- Add FilterExecBuilder to avoid recomputing properties multiple times [#19854](https://github.com/apache/datafusion/pull/19854) (adriangb) +- perf: Optimize round scalar performance [#19831](https://github.com/apache/datafusion/pull/19831) (kumarUjjawal) +- perf: Optimize signum scalar performance with fast path [#19871](https://github.com/apache/datafusion/pull/19871) (kumarUjjawal) +- perf: Optimize scalar performance for cot [#19888](https://github.com/apache/datafusion/pull/19888) (kumarUjjawal) +- perf: Optimize scalar fast path for iszero [#19919](https://github.com/apache/datafusion/pull/19919) (kumarUjjawal) +- Misc hash / hash aggregation performance improvements [#19910](https://github.com/apache/datafusion/pull/19910) (Dandandan) +- perf: Optimize scalar path for ascii function [#19951](https://github.com/apache/datafusion/pull/19951) (kumarUjjawal) +- perf: Optimize factorial scalar path [#19949](https://github.com/apache/datafusion/pull/19949) (kumarUjjawal) +- Speedup statistics_from_parquet_metadata [#20004](https://github.com/apache/datafusion/pull/20004) (Dandandan) +- perf: improve performance of `array_remove`, `array_remove_n` and `array_remove_all` functions [#19996](https://github.com/apache/datafusion/pull/19996) (lyne7-sc) +- perf: Optimize ArrowBytesViewMap with direct view access [#19975](https://github.com/apache/datafusion/pull/19975) (Tushar7012) +- perf: Optimize repeat function for scalar and array fast [#19976](https://github.com/apache/datafusion/pull/19976) (kumarUjjawal) +- perf: Push down join key filters for LEFT/RIGHT/ANTI joins [#19918](https://github.com/apache/datafusion/pull/19918) (nuno-faria) +- perf: Optimize scalar path for chr function [#20073](https://github.com/apache/datafusion/pull/20073) (kumarUjjawal) +- perf: improve performance of `array_repeat` function [#20049](https://github.com/apache/datafusion/pull/20049) (lyne7-sc) +- perf: optimise right for byte access and StringView [#20069](https://github.com/apache/datafusion/pull/20069) (theirix) +- Optimize `PhysicalExprSimplifier` [#20111](https://github.com/apache/datafusion/pull/20111) (AdamGS) +- Improve performance of `CASE WHEN x THEN y ELSE NULL` expressions [#20097](https://github.com/apache/datafusion/pull/20097) (pepijnve) +- perf: Optimize scalar fast path of to_hex function [#20112](https://github.com/apache/datafusion/pull/20112) (kumarUjjawal) +- perf: Optimize scalar fast path & write() encoding for sha2 [#20116](https://github.com/apache/datafusion/pull/20116) (kumarUjjawal) +- perf: improve performance of `array_union`/`array_intersect` with batched row conversion [#20243](https://github.com/apache/datafusion/pull/20243) (lyne7-sc) +- perf: various optimizations to eliminate branch misprediction in hash_utils [#20168](https://github.com/apache/datafusion/pull/20168) (notashes) +- perf: Optimize strpos() for ASCII-only inputs [#20295](https://github.com/apache/datafusion/pull/20295) (neilconway) +- perf: Optimize compare_element_to_list [#20323](https://github.com/apache/datafusion/pull/20323) (neilconway) +- perf: Optimize replace() fastpath by avoiding alloc [#20344](https://github.com/apache/datafusion/pull/20344) (neilconway) +- perf: optimize `array_distinct` with batched row conversion [#20364](https://github.com/apache/datafusion/pull/20364) (lyne7-sc) +- perf: Optimize scalar fast path of atan2 [#20336](https://github.com/apache/datafusion/pull/20336) (kumarUjjawal) +- perf: Optimize concat()/concat_ws() UDFs [#20317](https://github.com/apache/datafusion/pull/20317) (neilconway) +- perf: Optimize translate() UDF for scalar inputs [#20305](https://github.com/apache/datafusion/pull/20305) (neilconway) +- perf: Optimize `array_has()` for scalar needle [#20374](https://github.com/apache/datafusion/pull/20374) (neilconway) +- perf: Optimize lpad, rpad for ASCII strings [#20278](https://github.com/apache/datafusion/pull/20278) (neilconway) +- perf: Optimize trim UDFs for single-character trims [#20328](https://github.com/apache/datafusion/pull/20328) (neilconway) +- perf: Optimize scalar fast path for `regexp_like` and rejects g inside combined flags like ig [#20354](https://github.com/apache/datafusion/pull/20354) (kumarUjjawal) +- perf: Use zero-copy slice instead of take kernel in sort merge join [#20463](https://github.com/apache/datafusion/pull/20463) (andygrove) +- perf: Optimize `initcap()` [#20352](https://github.com/apache/datafusion/pull/20352) (neilconway) +- perf: Fix quadratic behavior of `to_array_of_size` [#20459](https://github.com/apache/datafusion/pull/20459) (neilconway) +- perf: Optimize `array_has_any()` with scalar arg [#20385](https://github.com/apache/datafusion/pull/20385) (neilconway) +- perf: Use Hashbrown for array_distinct [#20538](https://github.com/apache/datafusion/pull/20538) (neilconway) +- perf: Cache num_output_rows in sort merge join to avoid O(n) recount [#20478](https://github.com/apache/datafusion/pull/20478) (andygrove) +- perf: Optimize heap handling in TopK operator [#20556](https://github.com/apache/datafusion/pull/20556) (AdamGS) +- perf: Optimize `array_position` for scalar needle [#20532](https://github.com/apache/datafusion/pull/20532) (neilconway) +- perf: Use Arrow vectorized eq kernel for IN list with column references [#20528](https://github.com/apache/datafusion/pull/20528) (zhangxffff) +- perf: Optimize `array_agg()` using `GroupsAccumulator` [#20504](https://github.com/apache/datafusion/pull/20504) (neilconway) +- perf: Optimize `array_to_string()`, support more types [#20553](https://github.com/apache/datafusion/pull/20553) (neilconway) + +**Implemented enhancements:** + +- feat: add list_files_cache table function for `datafusion-cli` [#19388](https://github.com/apache/datafusion/pull/19388) (jizezhang) +- feat: implement metrics for AsyncFuncExec [#19626](https://github.com/apache/datafusion/pull/19626) (feniljain) +- feat: split BatchPartitioner::try_new into hash and round-robin constructors [#19668](https://github.com/apache/datafusion/pull/19668) (mohit7705) +- feat: add Time type support to date_trunc function [#19640](https://github.com/apache/datafusion/pull/19640) (kumarUjjawal) +- feat: Allow log with non-integer base on decimals [#19372](https://github.com/apache/datafusion/pull/19372) (Yuvraj-cyborg) +- feat(spark): implement array_repeat function [#19702](https://github.com/apache/datafusion/pull/19702) (cht42) +- feat(spark): Implement collect_list/collect_set aggregate functions [#19699](https://github.com/apache/datafusion/pull/19699) (cht42) +- feat: implement Spark size function for arrays and maps [#19592](https://github.com/apache/datafusion/pull/19592) (CuteChuanChuan) +- feat: support Set Comparison Subquery [#19109](https://github.com/apache/datafusion/pull/19109) (waynexia) +- feat(spark): implement array slice function [#19811](https://github.com/apache/datafusion/pull/19811) (cht42) +- feat(spark): implement substring function [#19805](https://github.com/apache/datafusion/pull/19805) (cht42) +- feat: Add support for 'isoyear' in date_part function [#19821](https://github.com/apache/datafusion/pull/19821) (cht42) +- feat: support `SELECT DISTINCT id FROM t ORDER BY id LIMIT n` query use GroupedTopKAggregateStream [#19653](https://github.com/apache/datafusion/pull/19653) (haohuaijin) +- feat(spark): add trunc, date_trunc and time_trunc functions [#19829](https://github.com/apache/datafusion/pull/19829) (cht42) +- feat(spark): implement Spark `date_diff` function [#19845](https://github.com/apache/datafusion/pull/19845) (cht42) +- feat(spark): implement add_months function [#19711](https://github.com/apache/datafusion/pull/19711) (cht42) +- feat: support pushdown alias on dynamic filter with `ProjectionExec` [#19404](https://github.com/apache/datafusion/pull/19404) (discord9) +- feat(spark): add `base64` and `unbase64` functions [#19968](https://github.com/apache/datafusion/pull/19968) (cht42) +- feat: Show the number of matched Parquet pages in `DataSourceExec` [#19977](https://github.com/apache/datafusion/pull/19977) (nuno-faria) +- feat(spark): Add `SessionStateBuilderSpark` to datafusion-spark [#19865](https://github.com/apache/datafusion/pull/19865) (cht42) +- feat(spark): implement `from/to_utc_timestamp` functions [#19880](https://github.com/apache/datafusion/pull/19880) (cht42) +- feat(spark): implement `StringView` for `SparkConcat` [#19984](https://github.com/apache/datafusion/pull/19984) (aryan-212) +- feat(spark): add unix date and timestamp functions [#19892](https://github.com/apache/datafusion/pull/19892) (cht42) +- feat: implement protobuf converter trait to allow control over serialization and deserialization processes [#19437](https://github.com/apache/datafusion/pull/19437) (timsaucer) +- feat: optimise copying in `left` for Utf8 and LargeUtf8 [#19980](https://github.com/apache/datafusion/pull/19980) (theirix) +- feat: support Spark-compatible abs math function part 2 - ANSI mode [#18828](https://github.com/apache/datafusion/pull/18828) (hsiang-c) +- feat: add AggregateMode::PartialReduce for tree-reduce aggregation [#20019](https://github.com/apache/datafusion/pull/20019) (njsmith) +- feat: add ExpressionPlacement enum for optimizer expression placement decisions [#20065](https://github.com/apache/datafusion/pull/20065) (adriangb) +- feat: support f16 in coercion logic [#18944](https://github.com/apache/datafusion/pull/18944) (Jefffrey) +- feat: unify left and right functions and benches [#20114](https://github.com/apache/datafusion/pull/20114) (theirix) +- feat(spark): Adds negative spark function [#20006](https://github.com/apache/datafusion/pull/20006) (SubhamSinghal) +- feat: support limited deletion [#20137](https://github.com/apache/datafusion/pull/20137) (askalt) +- feat: Pushdown filters through `UnionExec` nodes [#20145](https://github.com/apache/datafusion/pull/20145) (haohuaijin) +- feat: support Spark-compatible `string_to_map` function [#20120](https://github.com/apache/datafusion/pull/20120) (unknowntpo) +- feat: Add `partition_stats()` for `EmptyExec` [#20203](https://github.com/apache/datafusion/pull/20203) (jonathanc-n) +- feat: add ExtractLeafExpressions optimizer rule for get_field pushdown [#20117](https://github.com/apache/datafusion/pull/20117) (adriangb) +- feat: Push limit into hash join [#20228](https://github.com/apache/datafusion/pull/20228) (jonathanc-n) +- feat: Optimize hash util for `MapArray` [#20179](https://github.com/apache/datafusion/pull/20179) (jonathanc-n) +- feat: Implement Spark `bitmap_bit_position` function [#20275](https://github.com/apache/datafusion/pull/20275) (kazantsev-maksim) +- feat: support sqllogictest output coloring [#20368](https://github.com/apache/datafusion/pull/20368) (theirix) +- feat: support Spark-compatible `json_tuple` function [#20412](https://github.com/apache/datafusion/pull/20412) (CuteChuanChuan) +- feat: Implement Spark `bitmap_bucket_number` function [#20288](https://github.com/apache/datafusion/pull/20288) (kazantsev-maksim) +- feat: support `arrays_zip` function [#20440](https://github.com/apache/datafusion/pull/20440) (comphead) +- feat: Implement Spark `bin` function [#20479](https://github.com/apache/datafusion/pull/20479) (kazantsev-maksim) +- feat: support extension planner for `TableScan` [#20548](https://github.com/apache/datafusion/pull/20548) (linhr) + +**Fixed bugs:** + +- fix: Return Int for Date - Date instead of duration [#19563](https://github.com/apache/datafusion/pull/19563) (kumarUjjawal) +- fix: DynamicFilterPhysicalExpr violates Hash/Eq contract [#19659](https://github.com/apache/datafusion/pull/19659) (kumarUjjawal) +- fix: unnest struct field with an alias failed with internal error [#19698](https://github.com/apache/datafusion/pull/19698) (kumarUjjawal) +- fix(accumulators): preserve state in evaluate() for window frame queries [#19618](https://github.com/apache/datafusion/pull/19618) (GaneshPatil7517) +- fix: Don't treat quoted column names as placeholder variables in SQL [#19339](https://github.com/apache/datafusion/pull/19339) (pmallex) +- fix: enhance CTE resolution with identifier normalization [#19519](https://github.com/apache/datafusion/pull/19519) (kysshsy) +- feat: Add null-aware anti join support [#19635](https://github.com/apache/datafusion/pull/19635) (viirya) +- fix: expose `ListFilesEntry` [#19804](https://github.com/apache/datafusion/pull/19804) (lonless9) +- fix: trunc function with precision uses round instead of trunc semantics [#19794](https://github.com/apache/datafusion/pull/19794) (kumarUjjawal) +- fix: calculate total seconds from interval fields for `extract(epoch)` [#19807](https://github.com/apache/datafusion/pull/19807) (lemorage) +- fix: predicate cache stats calculation [#19561](https://github.com/apache/datafusion/pull/19561) (feniljain) +- fix: preserve state in DistinctMedianAccumulator::evaluate() for window frame queries [#19887](https://github.com/apache/datafusion/pull/19887) (kumarUjjawal) +- fix: null in array_agg with DISTINCT and IGNORE [#19736](https://github.com/apache/datafusion/pull/19736) (davidlghellin) +- fix: union should retrun error instead of panic when input schema's len different [#19922](https://github.com/apache/datafusion/pull/19922) (haohuaijin) +- fix: change token consumption to pick to test on EOF in parser [#19927](https://github.com/apache/datafusion/pull/19927) (askalt) +- fix: maintain inner list nullability for `array_sort` [#19948](https://github.com/apache/datafusion/pull/19948) (Jefffrey) +- fix: Make `generate_series` return an empty set with invalid ranges [#19999](https://github.com/apache/datafusion/pull/19999) (nuno-faria) +- fix: return correct length array for scalar null input to `calculate_binary_math` [#19861](https://github.com/apache/datafusion/pull/19861) (Jefffrey) +- fix: respect DataFrameWriteOptions::with_single_file_output for paths without extensions [#19931](https://github.com/apache/datafusion/pull/19931) (kumarUjjawal) +- fix: correct weight handling in approx_percentile_cont_with_weight [#19941](https://github.com/apache/datafusion/pull/19941) (sesteves) +- fix: The limit_pushdown physical optimization rule removes limits in some cases leading to incorrect results [#20048](https://github.com/apache/datafusion/pull/20048) (masonh22) +- Add duplicate name error reproducer [#20106](https://github.com/apache/datafusion/pull/20106) (gabotechs) +- fix: filter pushdown when merge filter [#20110](https://github.com/apache/datafusion/pull/20110) (haohuaijin) +- fix: Make `serialize_to_file` test cross platform [#20147](https://github.com/apache/datafusion/pull/20147) (nuno-faria) +- fix: regression of `dict_id` in physical plan proto [#20063](https://github.com/apache/datafusion/pull/20063) (kumarUjjawal) +- fix: panic in ListingTableFactory when session is not SessionState [#20139](https://github.com/apache/datafusion/pull/20139) (evangelisilva) +- fix: update comment on FilterPushdownPropagation [#20040](https://github.com/apache/datafusion/pull/20040) (niebayes) +- fix: datatype_is_logically_equal for dictionaries [#20153](https://github.com/apache/datafusion/pull/20153) (dd-annarose) +- fix: Avoid integer overflow in split_part() [#20198](https://github.com/apache/datafusion/pull/20198) (neilconway) +- fix: Fix panic in regexp_like() [#20200](https://github.com/apache/datafusion/pull/20200) (neilconway) +- fix: Handle NULL inputs correctly in find_in_set() [#20209](https://github.com/apache/datafusion/pull/20209) (neilconway) +- fix: Ensure columns are casted to the correct names with Unions [#20146](https://github.com/apache/datafusion/pull/20146) (nuno-faria) +- fix: Avoid assertion failure on divide-by-zero [#20216](https://github.com/apache/datafusion/pull/20216) (neilconway) +- fix: Throw coercion error for `LIKE` operations for nested types. [#20212](https://github.com/apache/datafusion/pull/20212) (jonathanc-n) +- fix: disable dynamic filter pushdown for non min/max aggregates [#20279](https://github.com/apache/datafusion/pull/20279) (notashes) +- fix: Avoid integer overflow in substr() [#20199](https://github.com/apache/datafusion/pull/20199) (neilconway) +- fix: Fix scalar broadcast for to_timestamp() [#20224](https://github.com/apache/datafusion/pull/20224) (neilconway) +- fix: Add integer check for bitwise coercion [#20241](https://github.com/apache/datafusion/pull/20241) (Acfboy) +- fix: percentile_cont interpolation causes NaN for f16 input [#20208](https://github.com/apache/datafusion/pull/20208) (kumarUjjawal) +- fix: validate inter-file ordering in eq_properties() [#20329](https://github.com/apache/datafusion/pull/20329) (adriangb) +- fix: update filter predicates for min/max aggregates only if bounds change [#20380](https://github.com/apache/datafusion/pull/20380) (notashes) +- fix: Handle Utf8View and LargeUtf8 separators in concat_ws [#20361](https://github.com/apache/datafusion/pull/20361) (neilconway) +- fix: HashJoin panic with dictionary-encoded columns in multi-key joins [#20441](https://github.com/apache/datafusion/pull/20441) (Tim-53) +- fix: handle out of range errors in DATE_BIN instead of panicking [#20221](https://github.com/apache/datafusion/pull/20221) (mishop-15) +- fix: prevent duplicate alias collision with user-provided \_\_datafusion_extracted names [#20432](https://github.com/apache/datafusion/pull/20432) (adriangb) +- fix: SortMergeJoin don't wait for all input before emitting [#20482](https://github.com/apache/datafusion/pull/20482) (rluvaton) +- fix: `cardinality()` of an empty array should be zero [#20533](https://github.com/apache/datafusion/pull/20533) (neilconway) +- fix: Unaccounted spill sort in row_hash [#20314](https://github.com/apache/datafusion/pull/20314) (EmilyMatt) +- fix: IS NULL panic with invalid function without input arguments [#20306](https://github.com/apache/datafusion/pull/20306) (Acfboy) +- fix: handle empty delimiter in split_part (closes #20503) [#20542](https://github.com/apache/datafusion/pull/20542) (gferrate) +- fix(substrait): Correctly parse field references in subqueries [#20439](https://github.com/apache/datafusion/pull/20439) (neilconway) +- fix: increase ROUND decimal precision to prevent overflow truncation [#19926](https://github.com/apache/datafusion/pull/19926) (kumarUjjawal) +- fix: Fix `array_to_string` with columnar third arg [#20536](https://github.com/apache/datafusion/pull/20536) (neilconway) +- fix: Fix and Refactor Spark `shuffle` function [#20484](https://github.com/apache/datafusion/pull/20484) (erenavsarogullari) + +**Documentation updates:** + +- perfect hash join [#19411](https://github.com/apache/datafusion/pull/19411) (UBarney) +- docs: Fix two small issues in introduction.md [#19712](https://github.com/apache/datafusion/pull/19712) (AdamGS) +- docs: Refine Communication documentation to highlight Discord [#19714](https://github.com/apache/datafusion/pull/19714) (alamb) +- chore(deps): bump maturin from 1.10.2 to 1.11.5 in /docs [#19740](https://github.com/apache/datafusion/pull/19740) (dependabot[bot]) +- chore: remove LZO Parquet compression [#19726](https://github.com/apache/datafusion/pull/19726) (kumarUjjawal) +- Update 52.0.0 release version number and changelog [#19767](https://github.com/apache/datafusion/pull/19767) (xudong963) +- Update the upgrading.md [#19769](https://github.com/apache/datafusion/pull/19769) (xudong963) +- chore: update copyright notice year [#19758](https://github.com/apache/datafusion/pull/19758) (Jefffrey) +- doc: Add an auto-generated dependency graph for internal crates [#19280](https://github.com/apache/datafusion/pull/19280) (2010YOUY01) +- Docs: Fix some links in docs [#19834](https://github.com/apache/datafusion/pull/19834) (alamb) +- Docs: add additional links to blog posts [#19833](https://github.com/apache/datafusion/pull/19833) (alamb) +- Ensure null inputs to array setop functions return null output [#19683](https://github.com/apache/datafusion/pull/19683) (Jefffrey) +- chore(deps): bump sphinx from 8.2.3 to 9.1.0 in /docs [#19647](https://github.com/apache/datafusion/pull/19647) (dependabot[bot]) +- Fix struct casts to align fields by name (prevent positional mis-casts) [#19674](https://github.com/apache/datafusion/pull/19674) (kosiew) +- chore(deps): bump setuptools from 80.9.0 to 80.10.1 in /docs [#19988](https://github.com/apache/datafusion/pull/19988) (dependabot[bot]) +- minor: Fix doc about `write_batch_size` [#19979](https://github.com/apache/datafusion/pull/19979) (nuno-faria) +- Fix broken links in the documentation [#19964](https://github.com/apache/datafusion/pull/19964) (alamb) +- minor: Add favicon [#20000](https://github.com/apache/datafusion/pull/20000) (nuno-faria) +- docs: Fix some broken / missing links in the DataFusion documentation [#19958](https://github.com/apache/datafusion/pull/19958) (alamb) +- chore(deps): bump setuptools from 80.10.1 to 80.10.2 in /docs [#20022](https://github.com/apache/datafusion/pull/20022) (dependabot[bot]) +- docs: Automatically update DataFusion version in docs [#20001](https://github.com/apache/datafusion/pull/20001) (nuno-faria) +- docs: update data_types.md to reflect current Arrow type mappings [#20072](https://github.com/apache/datafusion/pull/20072) (karuppuchamysuresh) +- Runs-on for `linux-build-lib` and `linux-test` (2X faster CI) [#20107](https://github.com/apache/datafusion/pull/20107) (blaginin) +- Disallow positional struct casting when field names don’t overlap [#19955](https://github.com/apache/datafusion/pull/19955) (kosiew) +- docs: fix docstring formatting [#20158](https://github.com/apache/datafusion/pull/20158) (Jefffrey) +- Break upgrade guides into separate pages [#20183](https://github.com/apache/datafusion/pull/20183) (mishop-15) +- Better document the relationship between `FileFormat::projection` / `FileFormat::filter` and `FileScanConfig::Statistics` [#20188](https://github.com/apache/datafusion/pull/20188) (alamb) +- Document the relationship between FileFormat::projection / FileFormat::filter and FileScanConfig::output_ordering [#20196](https://github.com/apache/datafusion/pull/20196) (alamb) +- More documentation on `FileSource::table_schema` and `FileSource::projection` [#20242](https://github.com/apache/datafusion/pull/20242) (alamb) +- chore(deps): bump setuptools from 80.10.2 to 82.0.0 in /docs [#20255](https://github.com/apache/datafusion/pull/20255) (dependabot[bot]) +- docs: fix typos and improve wording in README [#20301](https://github.com/apache/datafusion/pull/20301) (iampratap7997-dot) +- Reduce ExtractLeafExpressions optimizer overhead with fast pre-scan [#20341](https://github.com/apache/datafusion/pull/20341) (adriangb) +- chore(deps): bump maturin from 1.11.5 to 1.12.2 in /docs [#20400](https://github.com/apache/datafusion/pull/20400) (dependabot[bot]) +- Migrate Python usage to uv workspace [#20414](https://github.com/apache/datafusion/pull/20414) (adriangb) +- test: Extend Spark Array functions: `array_repeat `, `shuffle` and `slice` test coverage [#20420](https://github.com/apache/datafusion/pull/20420) (erenavsarogullari) +- Runs-on for more actions [#20274](https://github.com/apache/datafusion/pull/20274) (blaginin) +- docs: Document that adding new optimizer rules are expensive [#20348](https://github.com/apache/datafusion/pull/20348) (alamb) +- add redirect for old upgrading.html URL to fix broken changelog links [#20582](https://github.com/apache/datafusion/pull/20582) (mishop-15) +- Upgrade DataFusion to arrow-rs/parquet 58.0.0 / `object_store` 0.13.0 [#19728](https://github.com/apache/datafusion/pull/19728) (alamb) +- Document guidance on how to evaluate breaking API changes [#20584](https://github.com/apache/datafusion/pull/20584) (alamb) + +**Other:** + +- Add a protection to release candidate branch 52 [#19660](https://github.com/apache/datafusion/pull/19660) (xudong963) +- Downgrade aws-smithy-runtime, update `rust_decimal`, ignore RUSTSEC-2026-0001 to get clean CI [#19657](https://github.com/apache/datafusion/pull/19657) (alamb) +- Update dependencies [#19667](https://github.com/apache/datafusion/pull/19667) (alamb) +- Refactor PartitionedFile: add ordering field and new_from_meta constructor [#19596](https://github.com/apache/datafusion/pull/19596) (adriangb) +- Remove coalesce batches rule and deprecate CoalesceBatchesExec [#19622](https://github.com/apache/datafusion/pull/19622) (feniljain) +- Perf: Optimize `substring_index` via single-byte fast path and direct indexing [#19590](https://github.com/apache/datafusion/pull/19590) (lyne7-sc) +- refactor: Use `Signature::coercible` for isnan/iszero [#19604](https://github.com/apache/datafusion/pull/19604) (kumarUjjawal) +- Parquet: Push down supported list predicates (array_has/any/all) during decoding [#19545](https://github.com/apache/datafusion/pull/19545) (kosiew) +- Remove dependency on `rust_decimal`, remove ignore of `RUSTSEC-2026-0001` [#19666](https://github.com/apache/datafusion/pull/19666) (alamb) +- Store example data directly inside the datafusion-examples (#19141) [#19319](https://github.com/apache/datafusion/pull/19319) (cj-zhukov) +- minor: More comments to `ParquetOpener::open()` [#19677](https://github.com/apache/datafusion/pull/19677) (2010YOUY01) +- Feat: Allow pow with negative & non-integer exponent on decimals [#19369](https://github.com/apache/datafusion/pull/19369) (Yuvraj-cyborg) +- chore(deps): bump taiki-e/install-action from 2.65.13 to 2.65.15 [#19676](https://github.com/apache/datafusion/pull/19676) (dependabot[bot]) +- Refactor cache APIs to support ordering information [#19597](https://github.com/apache/datafusion/pull/19597) (adriangb) +- Record sort order when writing Parquet with WITH ORDER [#19595](https://github.com/apache/datafusion/pull/19595) (adriangb) +- implement var distinct [#19706](https://github.com/apache/datafusion/pull/19706) (thinh2) +- Fix TopK aggregation for UTF-8/Utf8View group keys and add safe fallback for unsupported string aggregates [#19285](https://github.com/apache/datafusion/pull/19285) (kosiew) +- infer parquet file order from metadata and use it to optimize scans [#19433](https://github.com/apache/datafusion/pull/19433) (adriangb) +- Add support for additional numeric types in to_timestamp functions [#19663](https://github.com/apache/datafusion/pull/19663) (gokselk) +- Fix internal error "Physical input schema should be the same as the one converted from logical input schema." [#18412](https://github.com/apache/datafusion/pull/18412) (alamb) +- fix(functions-aggregate): drain CORR state vectors for streaming aggregation [#19669](https://github.com/apache/datafusion/pull/19669) (geoffreyclaude) +- chore: bump dependabot PR limit for cargo from 5 to 15 [#19730](https://github.com/apache/datafusion/pull/19730) (Jefffrey) +- chore(deps): bump taiki-e/install-action from 2.65.15 to 2.66.1 [#19741](https://github.com/apache/datafusion/pull/19741) (dependabot[bot]) +- chore(deps): bump sqllogictest from 0.28.4 to 0.29.0 [#19744](https://github.com/apache/datafusion/pull/19744) (dependabot[bot]) +- chore(deps): bump blake3 from 1.8.2 to 1.8.3 [#19746](https://github.com/apache/datafusion/pull/19746) (dependabot[bot]) +- chore(deps): bump libc from 0.2.179 to 0.2.180 [#19748](https://github.com/apache/datafusion/pull/19748) (dependabot[bot]) +- chore(deps): bump async-compression from 0.4.36 to 0.4.37 [#19742](https://github.com/apache/datafusion/pull/19742) (dependabot[bot]) +- chore(deps): bump indexmap from 2.12.1 to 2.13.0 [#19747](https://github.com/apache/datafusion/pull/19747) (dependabot[bot]) +- Improve comment for predicate_cache_inner_records [#19762](https://github.com/apache/datafusion/pull/19762) (xudong963) +- Fix dynamic filter is_used function [#19734](https://github.com/apache/datafusion/pull/19734) (LiaCastaneda) +- slt: Add test for REE arrays in group by [#19763](https://github.com/apache/datafusion/pull/19763) (brancz) +- Fix run_tpcds data dir [#19771](https://github.com/apache/datafusion/pull/19771) (gabotechs) +- chore(deps): bump taiki-e/install-action from 2.66.1 to 2.66.2 [#19778](https://github.com/apache/datafusion/pull/19778) (dependabot[bot]) +- Include .proto files in datafusion-proto distribution [#19490](https://github.com/apache/datafusion/pull/19490) (DarkWanderer) +- Simplify `expr = L1 AND expr != L2` to `expr = L1` when `L1 != L2` [#19731](https://github.com/apache/datafusion/pull/19731) (simonvandel) +- chore(deps): bump flate2 from 1.1.5 to 1.1.8 [#19780](https://github.com/apache/datafusion/pull/19780) (dependabot[bot]) +- Upgrade DataFusion to arrow-rs/parquet 57.2.0 [#19355](https://github.com/apache/datafusion/pull/19355) (alamb) +- Expose Spilling Progress Interface in DataFusion [#19708](https://github.com/apache/datafusion/pull/19708) (xudong963) +- dev: Add a script to auto fix all lint violations [#19560](https://github.com/apache/datafusion/pull/19560) (2010YOUY01) +- refactor: Optimize `required_columns` from `BTreeSet` to `Vec` in struct `PushdownChecker` [#19678](https://github.com/apache/datafusion/pull/19678) (kumarUjjawal) +- Revert Workround for Empty FixedSizeBinary Values Buffer After arrow-rs Upgrade [#19801](https://github.com/apache/datafusion/pull/19801) (tobixdev) +- chore(deps): bump taiki-e/install-action from 2.66.2 to 2.66.3 [#19802](https://github.com/apache/datafusion/pull/19802) (dependabot[bot]) +- Add Reproducer for Issues with LEFT joins on Fixed Size Binary Columns [#19800](https://github.com/apache/datafusion/pull/19800) (tobixdev) +- Improvements to `list_files_cache` table function [#19703](https://github.com/apache/datafusion/pull/19703) (alamb) +- Issue 19781 : Internal error: Assertion failed: !self.finished: LimitedBatchCoalescer [#19785](https://github.com/apache/datafusion/pull/19785) (bert-beyondloops) +- physical plan: add `reset_plan_states `, plan re-use benchmark [#19806](https://github.com/apache/datafusion/pull/19806) (askalt) +- chore(deps): bump actions/setup-node from 6.1.0 to 6.2.0 [#19825](https://github.com/apache/datafusion/pull/19825) (dependabot[bot]) +- Use correct setting for click bench queries in sql_planner benchmark [#19835](https://github.com/apache/datafusion/pull/19835) (alamb) +- chore(deps): bump taiki-e/install-action from 2.66.3 to 2.66.5 [#19824](https://github.com/apache/datafusion/pull/19824) (dependabot[bot]) +- chore: refactor scalarvalue/encoding using available upstream arrow-rs methods [#19797](https://github.com/apache/datafusion/pull/19797) (Jefffrey) +- Refactor Spark `date_add`/`date_sub`/`bitwise_not` to remove unnecessary scalar arg check [#19473](https://github.com/apache/datafusion/pull/19473) (Jefffrey) +- Add BatchAdapter to simplify using PhysicalExprAdapter / Projector to map RecordBatch between schemas [#19716](https://github.com/apache/datafusion/pull/19716) (adriangb) +- [Minor] Reuse indices buffer in RepartitionExec [#19775](https://github.com/apache/datafusion/pull/19775) (Dandandan) +- Fix(optimizer): Make `EnsureCooperative` optimizer idempotent under multiple runs [#19757](https://github.com/apache/datafusion/pull/19757) (danielhumanmod) +- Allow dropping qualified columns [#19549](https://github.com/apache/datafusion/pull/19549) (ntjohnson1) +- Doc: Add more blog links to doc comments [#19837](https://github.com/apache/datafusion/pull/19837) (alamb) +- datafusion/common: Add support for hashing ListView arrays [#19814](https://github.com/apache/datafusion/pull/19814) (brancz) +- Project sort expressions in StreamingTable [#19719](https://github.com/apache/datafusion/pull/19719) (timsaucer) +- Fix grouping set subset satisfaction [#19853](https://github.com/apache/datafusion/pull/19853) (freakyzoidberg) +- Spark date part [#19823](https://github.com/apache/datafusion/pull/19823) (cht42) +- chore(deps): bump wasm-bindgen-test from 0.3.56 to 0.3.58 [#19898](https://github.com/apache/datafusion/pull/19898) (dependabot[bot]) +- chore(deps): bump tokio-postgres from 0.7.15 to 0.7.16 [#19899](https://github.com/apache/datafusion/pull/19899) (dependabot[bot]) +- chore(deps): bump postgres-types from 0.2.11 to 0.2.12 [#19902](https://github.com/apache/datafusion/pull/19902) (dependabot[bot]) +- chore(deps): bump insta from 1.46.0 to 1.46.1 [#19901](https://github.com/apache/datafusion/pull/19901) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.66.5 to 2.66.7 [#19883](https://github.com/apache/datafusion/pull/19883) (dependabot[bot]) +- Consolidate cte_quoted_reference.slt into cte.slt [#19862](https://github.com/apache/datafusion/pull/19862) (AnjaliChoudhary99) +- Disable failing `array_union` edge-case with nested null array [#19904](https://github.com/apache/datafusion/pull/19904) (Jefffrey) +- chore(deps): bump the proto group across 1 directory with 5 updates [#19745](https://github.com/apache/datafusion/pull/19745) (dependabot[bot]) +- test(wasmtest): enable compression feature for wasm build [#19860](https://github.com/apache/datafusion/pull/19860) (ChanTsune) +- Feat : added truncate table support [#19633](https://github.com/apache/datafusion/pull/19633) (Nachiket-Roy) +- Remove UDAF manual Debug impls and simplify signatures [#19727](https://github.com/apache/datafusion/pull/19727) (Jefffrey) +- chore(deps): bump thiserror from 2.0.17 to 2.0.18 [#19900](https://github.com/apache/datafusion/pull/19900) (dependabot[bot]) +- Include license and notice files in more crates [#19913](https://github.com/apache/datafusion/pull/19913) (ankane) +- chore(deps): bump actions/setup-python from 6.1.0 to 6.2.0 [#19935](https://github.com/apache/datafusion/pull/19935) (dependabot[bot]) +- Coerce expressions to udtf [#19915](https://github.com/apache/datafusion/pull/19915) (XiangpengHao) +- Fix trailing whitespace in CROSS JOIN logical plan formatting [#19936](https://github.com/apache/datafusion/pull/19936) (mkleen) +- chore(deps): bump chrono from 0.4.42 to 0.4.43 [#19897](https://github.com/apache/datafusion/pull/19897) (dependabot[bot]) +- Improve error message when string functions receive Binary types [#19819](https://github.com/apache/datafusion/pull/19819) (lemorage) +- Refactor ListArray hashing to consider only sliced values [#19500](https://github.com/apache/datafusion/pull/19500) (Jefffrey) +- feat(datafusion-spark): implement spark compatible `unhex` function [#19909](https://github.com/apache/datafusion/pull/19909) (lyne7-sc) +- Support API for "pre-image" for pruning predicate evaluation [#19722](https://github.com/apache/datafusion/pull/19722) (sdf-jkl) +- Support LargeUtf8 as partition column [#19942](https://github.com/apache/datafusion/pull/19942) (paleolimbot) +- chore(deps): bump actions/checkout from 6.0.1 to 6.0.2 [#19953](https://github.com/apache/datafusion/pull/19953) (dependabot[bot]) +- preserve FilterExec batch size during ser/de [#19960](https://github.com/apache/datafusion/pull/19960) (askalt) +- Add struct pushdown query benchmark and projection pushdown tests [#19962](https://github.com/apache/datafusion/pull/19962) (adriangb) +- Improve error messages with nicer formatting of Date and Time types [#19954](https://github.com/apache/datafusion/pull/19954) (emilk) +- export `SessionState::register_catalog_list(...)` [#19925](https://github.com/apache/datafusion/pull/19925) (askalt) +- Change GitHub actions dependabot schedule to weekly [#19981](https://github.com/apache/datafusion/pull/19981) (Jefffrey) +- chore(deps): bump taiki-e/install-action from 2.66.7 to 2.67.9 [#19987](https://github.com/apache/datafusion/pull/19987) (dependabot[bot]) +- chore(deps): bump quote from 1.0.43 to 1.0.44 [#19992](https://github.com/apache/datafusion/pull/19992) (dependabot[bot]) +- chore(deps): bump nix from 0.30.1 to 0.31.1 [#19991](https://github.com/apache/datafusion/pull/19991) (dependabot[bot]) +- chore(deps): bump sysinfo from 0.37.2 to 0.38.0 [#19990](https://github.com/apache/datafusion/pull/19990) (dependabot[bot]) +- chore(deps): bump uuid from 1.19.0 to 1.20.0 [#19993](https://github.com/apache/datafusion/pull/19993) (dependabot[bot]) +- minor: pull `uuid` into workspace dependencies [#19997](https://github.com/apache/datafusion/pull/19997) (Jefffrey) +- Fix ClickBench EventDate handling by casting UInt16 days-since-epoch to DATE via `hits` view [#19881](https://github.com/apache/datafusion/pull/19881) (kosiew) +- refactor: extract pushdown test utilities to shared module [#20010](https://github.com/apache/datafusion/pull/20010) (adriangb) +- chore(deps): bump taiki-e/install-action from 2.67.9 to 2.67.13 [#20020](https://github.com/apache/datafusion/pull/20020) (dependabot[bot]) +- add more projection pushdown slt tests [#20015](https://github.com/apache/datafusion/pull/20015) (adriangb) +- minor: Move metric `page_index_rows_pruned` to verbose level in `EXPLAIN ANALYZE` [#20026](https://github.com/apache/datafusion/pull/20026) (2010YOUY01) +- Tweak `adapter serialization` example [#20035](https://github.com/apache/datafusion/pull/20035) (adriangb) +- Simplify wait_complete function [#19937](https://github.com/apache/datafusion/pull/19937) (LiaCastaneda) +- [main] Update version to `52.1.0` (#19878) [#20028](https://github.com/apache/datafusion/pull/20028) (alamb) +- Fix/parquet opener page index policy [#19890](https://github.com/apache/datafusion/pull/19890) (aviralgarg05) +- minor: add tests for coercible signature considering nulls/dicts/ree [#19459](https://github.com/apache/datafusion/pull/19459) (Jefffrey) +- Enforce `clippy::allow_attributes` globally across workspace [#19576](https://github.com/apache/datafusion/pull/19576) (Jefffrey) +- Fix constant value from stats [#20042](https://github.com/apache/datafusion/pull/20042) (gabotechs) +- Simplify Spark `sha2` implementation [#19475](https://github.com/apache/datafusion/pull/19475) (Jefffrey) +- Further refactoring of type coercion function code [#19603](https://github.com/apache/datafusion/pull/19603) (Jefffrey) +- replace private is_volatile_expression_tree with equivalent public is_volatile [#20056](https://github.com/apache/datafusion/pull/20056) (adriangb) +- Improve documentation for ScalarUDFImpl::preimage [#20008](https://github.com/apache/datafusion/pull/20008) (alamb) +- Use BooleanBufferBuilder rather than Vec in ArrowBytesViewMap [#20064](https://github.com/apache/datafusion/pull/20064) (etk18) +- chore: Add microbenchmark (compared to ExprOrExpr) [#20076](https://github.com/apache/datafusion/pull/20076) (CuteChuanChuan) +- Minor: update tests in limit_pushdown.rs to insta [#20066](https://github.com/apache/datafusion/pull/20066) (alamb) +- Reduce number of traversals per node in `PhysicalExprSimplifier` [#20082](https://github.com/apache/datafusion/pull/20082) (AdamGS) +- Automatically generate examples documentation adv (#19294) [#19750](https://github.com/apache/datafusion/pull/19750) (cj-zhukov) +- Implement preimage for floor function to enable predicate pushdown [#20059](https://github.com/apache/datafusion/pull/20059) (devanshu0987) +- Refactor `iszero()` and `isnan()` to accept all numeric types [#20093](https://github.com/apache/datafusion/pull/20093) (kumarUjjawal) +- Use return_field_from_args in information schema and date_trunc [#20079](https://github.com/apache/datafusion/pull/20079) (AndreaBozzo) +- Preserve PhysicalExpr graph in proto round trip using Arc pointers as unique identifiers [#20037](https://github.com/apache/datafusion/pull/20037) (adriangb) +- add ability to customize tokens in parser [#19978](https://github.com/apache/datafusion/pull/19978) (askalt) +- Adjust `case_when DivideByZeroProtection` benchmark so that "percentage of zeroes" corresponds to "number of times protection is needed" [#20105](https://github.com/apache/datafusion/pull/20105) (pepijnve) +- refactor: Rename `FileSource::try_reverse_output` to `FileSource::try_pushdown_sort` [#20043](https://github.com/apache/datafusion/pull/20043) (kumarUjjawal) +- Improve memory accounting for ArrowBytesViewMap [#20077](https://github.com/apache/datafusion/pull/20077) (vigneshsiva11) +- chore: reduce production noise by using `debug` macro [#19885](https://github.com/apache/datafusion/pull/19885) (Standing-Man) +- chore(deps): bump taiki-e/install-action from 2.67.13 to 2.67.18 [#20124](https://github.com/apache/datafusion/pull/20124) (dependabot[bot]) +- chore(deps): bump actions/setup-node from 4 to 6 [#20125](https://github.com/apache/datafusion/pull/20125) (dependabot[bot]) +- chore(deps): bump tonic from 0.14.2 to 0.14.3 [#20127](https://github.com/apache/datafusion/pull/20127) (dependabot[bot]) +- chore(deps): bump insta from 1.46.1 to 1.46.3 [#20129](https://github.com/apache/datafusion/pull/20129) (dependabot[bot]) +- chore(deps): bump flate2 from 1.1.8 to 1.1.9 [#20130](https://github.com/apache/datafusion/pull/20130) (dependabot[bot]) +- chore(deps): bump clap from 4.5.54 to 4.5.56 [#20131](https://github.com/apache/datafusion/pull/20131) (dependabot[bot]) +- Add BufferExec execution plan [#19760](https://github.com/apache/datafusion/pull/19760) (gabotechs) +- Optimize the evaluation of date_part() == when pushed down [#19733](https://github.com/apache/datafusion/pull/19733) (sdf-jkl) +- chore(deps): bump bytes from 1.11.0 to 1.11.1 [#20141](https://github.com/apache/datafusion/pull/20141) (dependabot[bot]) +- Make session state builder clonable [#20136](https://github.com/apache/datafusion/pull/20136) (askalt) +- chore: remove datatype check functions in favour of upstream versions [#20104](https://github.com/apache/datafusion/pull/20104) (Jefffrey) +- Add Decimal support for floor preimage [#20099](https://github.com/apache/datafusion/pull/20099) (devanshu0987) +- Add more struct pushdown tests and planning benchmark [#20143](https://github.com/apache/datafusion/pull/20143) (adriangb) +- Add RepartitionExec test to projection_pushdown.slt [#20156](https://github.com/apache/datafusion/pull/20156) (adriangb) +- chore: Fix typos in comments [#20157](https://github.com/apache/datafusion/pull/20157) (neilconway) +- Fix `array_repeat` handling of null count values [#20102](https://github.com/apache/datafusion/pull/20102) (lyne7-sc) +- Refactor schema rewriter: remove lifetimes, extract column/cast helpers, add mismatch coverage [#20166](https://github.com/apache/datafusion/pull/20166) (kosiew) +- chore(deps): bump time from 0.3.44 to 0.3.47 [#20172](https://github.com/apache/datafusion/pull/20172) (dependabot[bot]) +- chore(deps-dev): bump webpack from 5.94.0 to 5.105.0 in /datafusion/wasmtest/datafusion-wasm-app [#20178](https://github.com/apache/datafusion/pull/20178) (dependabot[bot]) +- Fix Arrow Spill Underrun [#20159](https://github.com/apache/datafusion/pull/20159) (cetra3) +- nom parser instead of ad-hoc in examples [#20122](https://github.com/apache/datafusion/pull/20122) (cj-zhukov) +- fix(datafusion-cli): solve row count bug adding`saturating_add` to prevent potential overflow [#20185](https://github.com/apache/datafusion/pull/20185) (dariocurr) +- Enable inlist support for preimage [#20051](https://github.com/apache/datafusion/pull/20051) (sdf-jkl) +- unify the prettier versions [#20167](https://github.com/apache/datafusion/pull/20167) (cj-zhukov) +- chore: Unbreak doctest CI [#20218](https://github.com/apache/datafusion/pull/20218) (neilconway) +- Minor: verify plan output and unique field names [#20220](https://github.com/apache/datafusion/pull/20220) (alamb) +- Add more tests to projection_pushdown.slt [#20236](https://github.com/apache/datafusion/pull/20236) (adriangb) +- Add Expr::Alias passthrough to Expr::placement() [#20237](https://github.com/apache/datafusion/pull/20237) (adriangb) +- Make PushDownFilter and CommonSubexprEliminate aware of Expr::placement [#20239](https://github.com/apache/datafusion/pull/20239) (adriangb) +- Refactor example metadata parsing utilities(#20204) [#20233](https://github.com/apache/datafusion/pull/20233) (cj-zhukov) +- add module structure and unit tests for expression pushdown logical optimizer [#20238](https://github.com/apache/datafusion/pull/20238) (adriangb) +- repro and disable dyn filter for preserve file partitions [#20175](https://github.com/apache/datafusion/pull/20175) (gene-bordegaray) +- chore(deps): bump taiki-e/install-action from 2.67.18 to 2.67.27 [#20254](https://github.com/apache/datafusion/pull/20254) (dependabot[bot]) +- chore(deps): bump sysinfo from 0.38.0 to 0.38.1 [#20261](https://github.com/apache/datafusion/pull/20261) (dependabot[bot]) +- chore(deps): bump clap from 4.5.56 to 4.5.57 [#20265](https://github.com/apache/datafusion/pull/20265) (dependabot[bot]) +- chore(deps): bump tempfile from 3.24.0 to 3.25.0 [#20262](https://github.com/apache/datafusion/pull/20262) (dependabot[bot]) +- chore(deps): bump regex from 1.12.2 to 1.12.3 [#20260](https://github.com/apache/datafusion/pull/20260) (dependabot[bot]) +- chore(deps): bump criterion from 0.8.1 to 0.8.2 [#20258](https://github.com/apache/datafusion/pull/20258) (dependabot[bot]) +- chore(deps): bump regex-syntax from 0.8.8 to 0.8.9 [#20264](https://github.com/apache/datafusion/pull/20264) (dependabot[bot]) +- chore(deps): bump aws-config from 1.8.12 to 1.8.13 [#20263](https://github.com/apache/datafusion/pull/20263) (dependabot[bot]) +- chore(deps): bump async-compression from 0.4.37 to 0.4.39 [#20259](https://github.com/apache/datafusion/pull/20259) (dependabot[bot]) +- Support JSON arrays reader/parse for datafusion [#19924](https://github.com/apache/datafusion/pull/19924) (zhuqi-lucas) +- chore: Add confirmation before tarball is released [#20207](https://github.com/apache/datafusion/pull/20207) (milenkovicm) +- FilterExec should remap indices of parent dynamic filters [#20286](https://github.com/apache/datafusion/pull/20286) (jackkleeman) +- Clean up expression placement UDF usage in tests [#20272](https://github.com/apache/datafusion/pull/20272) (adriangb) +- chore(deps): bump the arrow-parquet group with 7 updates [#20256](https://github.com/apache/datafusion/pull/20256) (dependabot[bot]) +- Cleanup example metadata parsing utilities(#20251) [#20252](https://github.com/apache/datafusion/pull/20252) (cj-zhukov) +- Add `StructArray` and `RunArray` benchmark tests to `with_hashes` [#20182](https://github.com/apache/datafusion/pull/20182) (notashes) +- Add protoc support for ArrowScanExecNode (#20280) [#20284](https://github.com/apache/datafusion/pull/20284) (JoshElkind) +- Improve ExternalSorter ResourcesExhausted Error Message [#20226](https://github.com/apache/datafusion/pull/20226) (erenavsarogullari) +- Introduce ProjectionExprs::unproject_exprs/project_exprs and improve docs [#20193](https://github.com/apache/datafusion/pull/20193) (alamb) +- chore: Remove "extern crate criterion" in benches [#20299](https://github.com/apache/datafusion/pull/20299) (neilconway) +- Support pushing down empty projections into joins [#20191](https://github.com/apache/datafusion/pull/20191) (jackkleeman) +- chore: change width_bucket buckets parameter from i32 to i64 [#20330](https://github.com/apache/datafusion/pull/20330) (comphead) +- fix null handling for `nanvl` & implement fast path [#20205](https://github.com/apache/datafusion/pull/20205) (kumarUjjawal) +- unify the prettier version adv(#20024) [#20311](https://github.com/apache/datafusion/pull/20311) (cj-zhukov) +- chore: Make memchr a workspace dependency [#20345](https://github.com/apache/datafusion/pull/20345) (neilconway) +- feat(datafusion-cli): enhance CLI helper with default hint [#20310](https://github.com/apache/datafusion/pull/20310) (dariocurr) +- Adds support for ANSI mode in negative function [#20189](https://github.com/apache/datafusion/pull/20189) (SubhamSinghal) +- Support parent dynamic filters for more join types [#20192](https://github.com/apache/datafusion/pull/20192) (jackkleeman) +- Fix incorrect `SortExec` removal before `AggregateExec` (option 2) [#20247](https://github.com/apache/datafusion/pull/20247) (alamb) +- Fix `try_shrink` not freeing back to pool [#20382](https://github.com/apache/datafusion/pull/20382) (cetra3) +- chore(deps): bump sysinfo from 0.38.1 to 0.38.2 [#20411](https://github.com/apache/datafusion/pull/20411) (dependabot[bot]) +- chore(deps): bump indicatif from 0.18.3 to 0.18.4 [#20410](https://github.com/apache/datafusion/pull/20410) (dependabot[bot]) +- chore(deps): bump liblzma from 0.4.5 to 0.4.6 [#20409](https://github.com/apache/datafusion/pull/20409) (dependabot[bot]) +- chore(deps): bump aws-config from 1.8.13 to 1.8.14 [#20407](https://github.com/apache/datafusion/pull/20407) (dependabot[bot]) +- chore(deps): bump tonic from 0.14.3 to 0.14.4 [#20406](https://github.com/apache/datafusion/pull/20406) (dependabot[bot]) +- chore(deps): bump clap from 4.5.57 to 4.5.59 [#20404](https://github.com/apache/datafusion/pull/20404) (dependabot[bot]) +- chore(deps): bump sqllogictest from 0.29.0 to 0.29.1 [#20405](https://github.com/apache/datafusion/pull/20405) (dependabot[bot]) +- chore(deps): bump env_logger from 0.11.8 to 0.11.9 [#20402](https://github.com/apache/datafusion/pull/20402) (dependabot[bot]) +- chore(deps): bump actions/stale from 10.1.1 to 10.2.0 [#20397](https://github.com/apache/datafusion/pull/20397) (dependabot[bot]) +- chore(deps): bump uuid from 1.20.0 to 1.21.0 [#20401](https://github.com/apache/datafusion/pull/20401) (dependabot[bot]) +- [Minor] Update object_store to 0.12.5 [#20378](https://github.com/apache/datafusion/pull/20378) (Dandandan) +- chore(deps): bump syn from 2.0.114 to 2.0.116 [#20399](https://github.com/apache/datafusion/pull/20399) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.67.27 to 2.68.0 [#20398](https://github.com/apache/datafusion/pull/20398) (dependabot[bot]) +- chore: Cleanup returning null arrays [#20423](https://github.com/apache/datafusion/pull/20423) (neilconway) +- chore: fix labeler for `datafusion-functions-nested` [#20442](https://github.com/apache/datafusion/pull/20442) (comphead) +- build: update Rust toolchain version from 1.92.0 to 1.93.0 in `rust-toolchain.toml` [#20309](https://github.com/apache/datafusion/pull/20309) (dariocurr) +- chore: Cleanup "!is_valid(i)" -> "is_null(i)" [#20453](https://github.com/apache/datafusion/pull/20453) (neilconway) +- refactor: Extract sort-merge join filter logic into separate module [#19614](https://github.com/apache/datafusion/pull/19614) (viirya) +- Implement FFI table provider factory [#20326](https://github.com/apache/datafusion/pull/20326) (davisp) +- bench: Add criterion benchmark for sort merge join [#20464](https://github.com/apache/datafusion/pull/20464) (andygrove) +- chore: group minor dependencies into single PR [#20457](https://github.com/apache/datafusion/pull/20457) (comphead) +- chore(deps): bump taiki-e/install-action from 2.68.0 to 2.68.6 [#20467](https://github.com/apache/datafusion/pull/20467) (dependabot[bot]) +- chore(deps): bump astral-sh/setup-uv from 6.1.0 to 7.3.0 [#20468](https://github.com/apache/datafusion/pull/20468) (dependabot[bot]) +- chore(deps): bump the all-other-cargo-deps group with 6 updates [#20470](https://github.com/apache/datafusion/pull/20470) (dependabot[bot]) +- chore(deps): bump testcontainers-modules from 0.14.0 to 0.15.0 [#20471](https://github.com/apache/datafusion/pull/20471) (dependabot[bot]) +- [Minor] Use buffer_unordered [#20462](https://github.com/apache/datafusion/pull/20462) (Dandandan) +- bench: Add IN list benchmarks for non-constant list expressions [#20444](https://github.com/apache/datafusion/pull/20444) (zhangxffff) +- feat(memory-tracking): implement arrow_buffer::MemoryPool for MemoryPool [#18928](https://github.com/apache/datafusion/pull/18928) (notfilippo) +- chore: Avoid build fails on MinIO rate limits [#20472](https://github.com/apache/datafusion/pull/20472) (comphead) +- chore: Add end-to-end benchmark for array_agg, code cleanup [#20496](https://github.com/apache/datafusion/pull/20496) (neilconway) +- Upgrade to sqlparser 0.61.0 [#20177](https://github.com/apache/datafusion/pull/20177) (alamb) +- Switch to the latest Mac OS [#20510](https://github.com/apache/datafusion/pull/20510) (blaginin) +- Fix name tracker [#19856](https://github.com/apache/datafusion/pull/19856) (xanderbailey) +- Runs-on for extended CI checks [#20511](https://github.com/apache/datafusion/pull/20511) (blaginin) +- chore(deps): bump strum from 0.27.2 to 0.28.0 [#20520](https://github.com/apache/datafusion/pull/20520) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.68.6 to 2.68.8 [#20518](https://github.com/apache/datafusion/pull/20518) (dependabot[bot]) +- chore(deps): bump the all-other-cargo-deps group with 2 updates [#20519](https://github.com/apache/datafusion/pull/20519) (dependabot[bot]) +- Make `custom_file_casts` example schema nullable to allow null `id` values during casting [#20486](https://github.com/apache/datafusion/pull/20486) (kosiew) +- Add support for FFI config extensions [#19469](https://github.com/apache/datafusion/pull/19469) (timsaucer) +- chore: Cleanup code to use `repeat_n` in a few places [#20527](https://github.com/apache/datafusion/pull/20527) (neilconway) +- chore(deps): bump strum_macros from 0.27.2 to 0.28.0 [#20521](https://github.com/apache/datafusion/pull/20521) (dependabot[bot]) +- chore: Replace `matches!` on fieldless enums with `==` [#20525](https://github.com/apache/datafusion/pull/20525) (neilconway) +- Update comments on OptimizerRule about function name matching [#20346](https://github.com/apache/datafusion/pull/20346) (alamb) +- Fix incorrect regex pattern in regex_replace_posix_groups [#19827](https://github.com/apache/datafusion/pull/19827) (GaneshPatil7517) +- Improve `HashJoinExecBuilder` to save state from previous fields [#20276](https://github.com/apache/datafusion/pull/20276) (askalt) +- [Minor] Fix error messages for `shrink` and `try_shrink` [#20422](https://github.com/apache/datafusion/pull/20422) (hareshkh) +- Fix physical expr adapter to resolve physical fields by name, not column index [#20485](https://github.com/apache/datafusion/pull/20485) (kosiew) +- [fix] Add type coercion from NULL to Interval to make date_bin more postgres compatible [#20499](https://github.com/apache/datafusion/pull/20499) (LiaCastaneda) +- Clamp early aggregation emit to the sort boundary when using partial group ordering [#20446](https://github.com/apache/datafusion/pull/20446) (jackkleeman) +- Split `push_down_filter.slt` into standalone sqllogictest files to reduce long-tail runtime [#20566](https://github.com/apache/datafusion/pull/20566) (kosiew) +- Add deterministic per-file timing summary to sqllogictest runner [#20569](https://github.com/apache/datafusion/pull/20569) (kosiew) +- chore: Enable workspace lint for all workspace members [#20577](https://github.com/apache/datafusion/pull/20577) (neilconway) +- Fix serde of window lead/lag defaults [#20608](https://github.com/apache/datafusion/pull/20608) (avantgardnerio) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 73 dependabot[bot] + 35 Neil Conway + 31 Kumar Ujjawal + 27 Andrew Lamb + 26 Adrian Garcia Badaracco + 21 Jeffrey Vo + 13 cht42 + 10 Albert Skalt + 10 kosiew + 10 lyne + 8 Nuno Faria + 7 Sergey Zhukov + 7 xudong.w + 6 Daniël Heres + 5 Adam Gutglick + 5 Gabriel + 5 Oleks V + 4 Andy Grove + 4 Dmitrii Blaginin + 4 Huaijin + 4 Jack Kleeman + 4 Jonathan Chen + 4 Yongting You + 4 notashes + 4 theirix + 3 Eren Avsarogullari + 3 Kazantsev Maksim + 3 Kosta Tarasov + 3 Liang-Chi Hsieh + 3 Lía Adriana + 3 Tim Saucer + 3 Yu-Chuan Hung + 3 dario curreri + 3 feniljain + 3 mishop-15 + 2 Acfboy + 2 Alan Tang + 2 Devanshu + 2 Frederic Branczyk + 2 Ganesh Patil + 2 Miao + 2 Michael Kleen + 2 Pepijn Van Eeckhoudt + 2 Peter L + 2 Subham Singhal + 2 Tobias Schwarzinger + 2 UBarney + 2 Yuvraj + 2 Zhang Xiaofeng + 2 jizezhang + 2 niebayes + 1 Andrea Bozzo + 1 Andrew Kane + 1 Anjali Choudhary + 1 Anna-Rose Lescure + 1 Aryan Anand + 1 Aviral Garg + 1 Bert Vermeiren + 1 Brent Gardner + 1 ChanTsune + 1 David López + 1 Dewey Dunnington + 1 Divyansh Pratap Singh + 1 Eesh Sagar Singh + 1 Emil Ernerfeldt + 1 Emily Matheys + 1 Eric Chang + 1 Evangeli Silva + 1 Filippo + 1 Gabriel Ferraté + 1 Gene Bordegaray + 1 Geoffrey Claude + 1 Goksel Kabadayi + 1 Haresh Khanna + 1 Heran Lin + 1 Josh Elkind + 1 Marko Milenković + 1 Mason + 1 Mikhail Zabaluev + 1 Mohit rao + 1 Nathaniel J. Smith + 1 Nick + 1 Oleg V. Kozlyuk + 1 Paul J. Davis + 1 Pierre Lacave + 1 Qi Zhu + 1 Raz Luvaton + 1 Rosai + 1 Ruihang Xia + 1 Sergio Esteves + 1 Simon Vandel Sillesen + 1 Siyuan Huang + 1 Tim-53 + 1 Tushar Das + 1 Vignesh + 1 XL Liang + 1 Xander + 1 Xiangpeng Hao + 1 comphead + 1 danielhumanmod + 1 discord9 + 1 hsiang-c + 1 iamthinh + 1 karuppuchamysuresh + 1 pmallex +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/docs/source/download.md b/docs/source/download.md index 3be76a6acf7b4..ed8fc06440f0c 100644 --- a/docs/source/download.md +++ b/docs/source/download.md @@ -26,7 +26,7 @@ For example: ```toml [dependencies] -datafusion = "52.1.0" +datafusion = "53.0.0" ``` While DataFusion is distributed via [crates.io] as a convenience, the diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index e48f0a7c92276..11a1a8a2d6831 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -101,7 +101,7 @@ The following configuration settings are available: | datafusion.execution.parquet.dictionary_page_size_limit | 1048576 | (writing) Sets best effort maximum dictionary page size, in bytes | | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 52.1.0 | (writing) Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 53.0.0 | (writing) Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length | | datafusion.execution.parquet.statistics_truncate_length | 64 | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.data_page_row_count_limit | 20000 | (writing) Sets best effort maximum number of rows in data page | diff --git a/docs/source/user-guide/crate-configuration.md b/docs/source/user-guide/crate-configuration.md index 44b4d39839c5b..2acb2140efcbf 100644 --- a/docs/source/user-guide/crate-configuration.md +++ b/docs/source/user-guide/crate-configuration.md @@ -156,7 +156,7 @@ By default, Datafusion returns errors as a plain text message. You can enable mo such as backtraces by enabling the `backtrace` feature to your `Cargo.toml` file like this: ```toml -datafusion = { version = "52.1.0", features = ["backtrace"]} +datafusion = { version = "53.0.0", features = ["backtrace"]} ``` Set environment [variables](https://doc.rust-lang.org/std/backtrace/index.html#environment-variables) diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md index 83ba530d2b3b6..fd755715eec91 100644 --- a/docs/source/user-guide/example-usage.md +++ b/docs/source/user-guide/example-usage.md @@ -29,7 +29,7 @@ Find latest available Datafusion version on [DataFusion's crates.io] page. Add the dependency to your `Cargo.toml` file: ```toml -datafusion = "52.1.0" +datafusion = "53.0.0" tokio = { version = "1.0", features = ["rt-multi-thread"] } ```