diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index f0a03d9841a9..0b5c78405b10 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -42,7 +42,7 @@ jobs: steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install cargo-audit - uses: taiki-e/install-action@f535147c22906d77695e11cb199e764aa610a4fc # v2.62.46 + uses: taiki-e/install-action@44c6d64aa62cd779e873306675c7a58e86d6d532 # v2.62.49 with: tool: cargo-audit - name: Run audit check diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 4b3c31e6b3b0..9bdcade8eb2e 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -434,7 +434,7 @@ jobs: sudo apt-get update -qq sudo apt-get install -y -qq clang - name: Setup wasm-pack - uses: taiki-e/install-action@f535147c22906d77695e11cb199e764aa610a4fc # v2.62.46 + uses: taiki-e/install-action@44c6d64aa62cd779e873306675c7a58e86d6d532 # v2.62.49 with: tool: wasm-pack - name: Run tests with headless mode @@ -761,7 +761,7 @@ jobs: - name: Setup Rust toolchain uses: ./.github/actions/setup-builder - name: Install cargo-msrv - uses: taiki-e/install-action@f535147c22906d77695e11cb199e764aa610a4fc # v2.62.46 + uses: taiki-e/install-action@44c6d64aa62cd779e873306675c7a58e86d6d532 # v2.62.49 with: tool: cargo-msrv diff --git a/Cargo.lock b/Cargo.lock index f500265108ff..9cf4d96415c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,9 +83,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -128,9 +128,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.20" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -143,9 +143,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.11" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" @@ -203,14 +203,23 @@ dependencies = [ "serde_bytes", "serde_json", "snap", - "strum 0.27.2", - "strum_macros 0.27.2", + "strum", + "strum_macros", "thiserror", "uuid", "xz2", "zstd", ] +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] + [[package]] name = "arrayref" version = "0.3.9" @@ -453,7 +462,7 @@ version = "57.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "serde", "serde_core", "serde_json", @@ -552,7 +561,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -574,7 +583,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -585,7 +594,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -611,9 +620,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.7" +version = "1.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04b37ddf8d2e9744a0b9c19ce0b78efe4795339a90b66b7bae77987092cd2e69" +checksum = "1856b1b48b65f71a4dd940b1c0931f9a7b646d4a924b9828ffefc1454714668a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -641,9 +650,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.7" +version = "1.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799a1290207254984cb7c05245111bc77958b92a3c9bb449598044b36341cce6" +checksum = "86590e57ea40121d47d3f2e131bfd873dea15d78dc2f4604f4734537ad9e56c4" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -653,9 +662,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94b8ff6c09cd57b16da53641caa860168b88c172a5ee163b0288d3d6eea12786" +checksum = "879b6c89592deb404ba4dc0ae6b58ffd1795c78991cbb5b8bc441c48a070440d" dependencies = [ "aws-lc-sys", "zeroize", @@ -663,9 +672,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.31.0" +version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e44d16778acaf6a9ec9899b92cebd65580b83f685446bf2e1f5d3d732f99dcd" +checksum = "107a4e9d9cab9963e04e84bb8dee0e25f2a987f9a8bad5ed054abd439caa8f8c" dependencies = [ "bindgen", "cc", @@ -676,9 +685,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.11" +version = "1.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e1ed337dabcf765ad5f2fb426f13af22d576328aaf09eac8f70953530798ec0" +checksum = "8fe0fd441565b0b318c76e7206c8d1d0b0166b3e986cf30e890b61feb6192045" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -700,9 +709,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.85.0" +version = "1.89.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f2c741e2e439f07b5d1b33155e246742353d82167c785a2ff547275b7e32483" +checksum = "a9c1b1af02288f729e95b72bd17988c009aa72e26dcb59b3200f86d7aea726c9" dependencies = [ "aws-credential-types", "aws-runtime", @@ -722,9 +731,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.87.0" +version = "1.91.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6428ae5686b18c0ee99f6f3c39d94ae3f8b42894cdc35c35d8fb2470e9db2d4c" +checksum = "4e8122301558dc7c6c68e878af918880b82ff41897a60c8c4e18e4dc4d93e9f1" dependencies = [ "aws-credential-types", "aws-runtime", @@ -744,9 +753,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.87.0" +version = "1.91.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5871bec9a79a3e8d928c7788d654f135dde0e71d2dd98089388bab36b37ef607" +checksum = "8f8090151d4d1e971269957b10dbf287bba551ab812e591ce0516b1c73b75d27" dependencies = [ "aws-credential-types", "aws-runtime", @@ -767,9 +776,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.4" +version = "1.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c" +checksum = "c35452ec3f001e1f2f6db107b6373f1f48f05ec63ba2c5c9fa91f07dad32af11" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -789,9 +798,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.5" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c" +checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c" dependencies = [ "futures-util", "pin-project-lite", @@ -800,15 +809,16 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.3" +version = "0.62.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9" +checksum = "445d5d720c99eed0b4aa674ed00d835d9b1427dd73e04adaf2f94c6b2d6f9fca" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", "bytes-utils", "futures-core", + "futures-util", "http 0.2.12", "http 1.3.1", "http-body 0.4.6", @@ -820,9 +830,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.1" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147e8eea63a40315d704b97bf9bc9b8c1402ae94f89d5ad6f7550d963309da1b" +checksum = "623254723e8dfd535f566ee7b2381645f8981da086b5c4aa26c0c41582bb1d2c" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -844,27 +854,27 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.5" +version = "0.61.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaa31b350998e703e9826b2104dd6f63be0508666e1aba88137af060e8944047" +checksum = "2db31f727935fc63c6eeae8b37b438847639ec330a9161ece694efba257e0c54" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-observability" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393" +checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc" dependencies = [ "aws-smithy-runtime-api", ] [[package]] name = "aws-smithy-query" -version = "0.60.7" +version = "0.60.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" +checksum = "d28a63441360c477465f80c7abac3b9c4d075ca638f982e605b7dc2a2c7156c9" dependencies = [ "aws-smithy-types", "urlencoding", @@ -872,9 +882,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.2" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185" +checksum = "0bbe9d018d646b96c7be063dd07987849862b0e6d07c778aad7d93d1be6c1ef0" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -896,9 +906,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.9.0" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07f5e0fc8a6b3f2303f331b94504bbf754d85488f402d6f1dd7a6080f99afe56" +checksum = "ec7204f9fd94749a7c53b26da1b961b4ac36bf070ef1e0b94bb09f79d4f6c193" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -913,9 +923,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.3.2" +version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8" +checksum = "25f535879a207fce0db74b679cfc3e91a3159c8144d717d55f5832aea9eef46e" dependencies = [ "base64-simd", "bytes", @@ -936,18 +946,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.10" +version = "0.60.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728" +checksum = "eab77cdd036b11056d2a30a7af7b775789fb024bf216acc13884c6c97752ae56" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.8" +version = "1.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390" +checksum = "d79fb68e3d7fe5d4833ea34dc87d2e97d26d3086cb3da660bb6b1f76d98680b6" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -959,9 +969,9 @@ dependencies = [ [[package]] name = "axum" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" +checksum = "8a18ed336352031311f4e0b4dd2ff392d4fbb370777c9d18d7fc9d7359f73871" dependencies = [ "axum-core", "bytes", @@ -975,8 +985,7 @@ dependencies = [ "mime", "percent-encoding", "pin-project-lite", - "rustversion", - "serde", + "serde_core", "sync_wrapper", "tower", "tower-layer", @@ -985,9 +994,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.5.2" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" +checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" dependencies = [ "bytes", "futures-core", @@ -996,7 +1005,6 @@ dependencies = [ "http-body-util", "mime", "pin-project-lite", - "rustversion", "sync_wrapper", "tower-layer", "tower-service", @@ -1026,9 +1034,9 @@ dependencies = [ [[package]] name = "bigdecimal" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" dependencies = [ "autocfg", "libm", @@ -1044,7 +1052,7 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "cexpr", "clang-sys", "itertools 0.13.0", @@ -1055,7 +1063,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -1066,9 +1074,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.4" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "bitvec" @@ -1115,13 +1123,13 @@ dependencies = [ [[package]] name = "bollard" -version = "0.19.3" +version = "0.19.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7646ee90964aa59e9f832a67182791396a19a5b1d76eb17599a8310a7e2e09" +checksum = "87a52479c9237eb04047ddb94788c41ca0d26eaff8b697ecfbb4c32f7fdc3b1b" dependencies = [ "async-stream", "base64 0.22.1", - "bitflags 2.9.4", + "bitflags 2.10.0", "bollard-buildkit-proto", "bollard-stubs", "bytes", @@ -1192,9 +1200,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.7.2" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2529c31017402be841eb45892278a6c21a000c0a17643af326c73a73f83f0fb" +checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1" dependencies = [ "bon-macros", "rustversion", @@ -1202,9 +1210,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.7.2" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82020dadcb845a345591863adb65d74fa8dc5c18a0b6d408470e13b7adc7005" +checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645" dependencies = [ "darling", "ident_case", @@ -1212,7 +1220,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -1235,7 +1243,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -1261,9 +1269,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" dependencies = [ "memchr", "serde", @@ -1355,9 +1363,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.38" +version = "1.2.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80f41ae168f955c12fb8960b057d70d0ca153fb83182b57d86380443527be7e9" +checksum = "35900b6c8d709fb1d854671ae27aeaa9eec2f8b01b364e1619a40da3e6fe2afe" dependencies = [ "find-msvc-tools", "jobserver", @@ -1376,9 +1384,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "cfg_aliases" @@ -1397,7 +1405,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -1461,9 +1469,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.50" +version = "4.5.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623" +checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5" dependencies = [ "clap_builder", "clap_derive", @@ -1471,9 +1479,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.50" +version = "4.5.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0" +checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a" dependencies = [ "anstream", "anstyle", @@ -1490,14 +1498,14 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] name = "clap_lex" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "clipboard-win" @@ -1525,13 +1533,12 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "comfy-table" -version = "7.1.2" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" dependencies = [ - "strum 0.26.3", - "strum_macros 0.26.4", - "unicode-width 0.2.1", + "unicode-segmentation", + "unicode-width 0.2.2", ] [[package]] @@ -1555,8 +1562,8 @@ dependencies = [ "encode_unicode", "libc", "once_cell", - "unicode-width 0.2.1", - "windows-sys 0.61.0", + "unicode-width 0.2.2", + "windows-sys 0.61.2", ] [[package]] @@ -1662,7 +1669,7 @@ dependencies = [ "anes", "cast", "ciborium", - "clap 4.5.50", + "clap 4.5.51", "criterion-plot", "futures", "itertools 0.13.0", @@ -1740,21 +1747,21 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" dependencies = [ "csv-core", "itoa", "ryu", - "serde", + "serde_core", ] [[package]] name = "csv-core" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" dependencies = [ "memchr", ] @@ -1802,7 +1809,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -1813,7 +1820,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -1832,7 +1839,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "arrow-schema", @@ -1904,7 +1911,7 @@ dependencies = [ [[package]] name = "datafusion-benchmarks" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion", @@ -1929,7 +1936,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -1952,7 +1959,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -1970,19 +1977,18 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "tokio", ] [[package]] name = "datafusion-cli" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", "aws-config", "aws-credential-types", "chrono", - "clap 4.5.50", + "clap 4.5.51", "ctor", "datafusion", "datafusion-common", @@ -2007,7 +2013,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "apache-avro", @@ -2034,7 +2040,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.3.0" +version = "51.0.0" dependencies = [ "futures", "log", @@ -2043,7 +2049,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-compression", @@ -2078,7 +2084,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "arrow-ipc", @@ -2101,7 +2107,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "50.3.0" +version = "51.0.0" dependencies = [ "apache-avro", "arrow", @@ -2120,7 +2126,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2141,7 +2147,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2161,7 +2167,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2190,11 +2196,11 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "50.3.0" +version = "51.0.0" [[package]] name = "datafusion-examples" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "arrow-flight", @@ -2228,7 +2234,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2249,7 +2255,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2273,7 +2279,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2284,7 +2290,7 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "50.3.0" +version = "51.0.0" dependencies = [ "abi_stable", "arrow", @@ -2306,7 +2312,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -2338,7 +2344,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2359,7 +2365,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2372,7 +2378,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "arrow-ord", @@ -2395,7 +2401,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2409,7 +2415,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2425,7 +2431,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2433,16 +2439,16 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.3.0" +version = "51.0.0" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] name = "datafusion-optimizer" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", @@ -2469,7 +2475,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2494,7 +2500,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2507,7 +2513,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2519,7 +2525,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2539,7 +2545,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "50.3.0" +version = "51.0.0" dependencies = [ "ahash 0.8.12", "arrow", @@ -2575,7 +2581,7 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "chrono", @@ -2611,7 +2617,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2623,7 +2629,7 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2641,7 +2647,7 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.3.0" +version = "51.0.0" dependencies = [ "async-trait", "datafusion-common", @@ -2653,7 +2659,7 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2673,7 +2679,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2699,14 +2705,14 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "50.3.0" +version = "51.0.0" dependencies = [ "arrow", "async-trait", "bigdecimal", "bytes", "chrono", - "clap 4.5.50", + "clap 4.5.51", "datafusion", "datafusion-spark", "datafusion-substrait", @@ -2733,7 +2739,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "50.3.0" +version = "51.0.0" dependencies = [ "async-recursion", "async-trait", @@ -2755,7 +2761,7 @@ dependencies = [ [[package]] name = "datafusion-wasmtest" -version = "50.3.0" +version = "51.0.0" dependencies = [ "chrono", "console_error_panic_hook", @@ -2776,12 +2782,12 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.3" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc" +checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" dependencies = [ "powerfmt", - "serde", + "serde_core", ] [[package]] @@ -2819,7 +2825,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.0", + "windows-sys 0.61.2", ] [[package]] @@ -2830,14 +2836,14 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] name = "doc-comment" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +checksum = "780955b8b195a21ab8e4ac6b60dd1dbdcec1dc6c51c0617964b08c81785e12c9" [[package]] name = "docker_credential" @@ -2886,7 +2892,7 @@ dependencies = [ "enum-ordinalize", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -2909,29 +2915,29 @@ checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" [[package]] name = "enum-ordinalize" -version = "4.3.0" +version = "4.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea0dcfa4e54eeb516fe454635a95753ddd39acda650ce703031c6973e315dd5" +checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0" dependencies = [ "enum-ordinalize-derive", ] [[package]] name = "enum-ordinalize-derive" -version = "4.3.1" +version = "4.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff" +checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] name = "env_filter" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" dependencies = [ "log", "regex", @@ -2963,7 +2969,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.0", + "windows-sys 0.61.2", ] [[package]] @@ -3056,9 +3062,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" [[package]] name = "fixedbitset" @@ -3068,19 +3074,19 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "25.2.10" +version = "25.9.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "rustc_version", ] [[package]] name = "flate2" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" dependencies = [ "crc32fast", "libz-rs-sys", @@ -3099,6 +3105,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -3110,9 +3122,9 @@ dependencies = [ [[package]] name = "fs-err" -version = "3.1.2" +version = "3.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f150ffc8782f35521cec2b23727707cb4045706ba3c854e86bef66b3a8cdbd" +checksum = "6ad492b2cf1d89d568a43508ab24f98501fe03f2f31c01e1d0fe7366a71745d2" dependencies = [ "autocfg", ] @@ -3185,7 +3197,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -3251,9 +3263,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.7" +version = "0.14.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" dependencies = [ "typenum", "version_check", @@ -3294,9 +3306,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "globset" -version = "0.4.16" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" dependencies = [ "aho-corasick", "bstr", @@ -3361,9 +3373,7 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "allocator-api2", - "equivalent", - "foldhash", + "foldhash 0.1.5", ] [[package]] @@ -3371,6 +3381,11 @@ name = "hashbrown" version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "heck" @@ -3404,11 +3419,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.11" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3604,7 +3619,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.62.0", + "windows-core 0.62.2", ] [[package]] @@ -3618,9 +3633,9 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", "potential_utf", @@ -3631,9 +3646,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -3644,11 +3659,10 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", @@ -3659,42 +3673,38 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99" dependencies = [ - "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", - "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" +checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899" [[package]] name = "icu_provider" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", "icu_locale_core", - "stable_deref_trait", - "tinystr", "writeable", "yoke", "zerofrom", @@ -3754,22 +3764,25 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.18.0" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a646d946d06bedbbc4cac4c218acf4bbf2d87757a784857025f4d447e4e1cd" +checksum = "ade6dfcba0dfb62ad59e59e7241ec8912af34fd29e0e743e3db992bd278e8b65" dependencies = [ "console 0.16.1", "portable-atomic", - "unicode-width 0.2.1", + "unicode-width 0.2.2", "unit-prefix", "web-time", ] [[package]] name = "indoc" -version = "2.0.6" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] [[package]] name = "insta" @@ -3811,9 +3824,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" dependencies = [ "memchr", "serde", @@ -3821,9 +3834,9 @@ dependencies = [ [[package]] name = "is_terminal_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" @@ -3851,26 +3864,26 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jiff" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" dependencies = [ "jiff-static", "log", "portable-atomic", "portable-atomic-util", - "serde", + "serde_core", ] [[package]] name = "jiff-static" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -3985,7 +3998,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" dependencies = [ "cfg-if", - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -4011,7 +4024,7 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "libc", "redox_syscall", ] @@ -4024,7 +4037,7 @@ checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" dependencies = [ "anstream", "anstyle", - "clap 4.5.50", + "clap 4.5.51", "escape8259", ] @@ -4045,17 +4058,16 @@ checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "litemap" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] @@ -4109,9 +4121,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.5" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "memoffset" @@ -4165,13 +4177,13 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.4" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" dependencies = [ "libc", "wasi", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4195,7 +4207,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "cfg-if", "cfg_aliases", "libc", @@ -4222,11 +4234,11 @@ dependencies = [ [[package]] name = "nu-ansi-term" -version = "0.50.1" +version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4312,23 +4324,32 @@ dependencies = [ [[package]] name = "objc2-core-foundation" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", ] [[package]] name = "objc2-io-kit" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c1c64d6120e51cd86033f67176b1cb66780c2efe34dec55176f77befd93c0a" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" dependencies = [ "libc", "objc2-core-foundation", ] +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + [[package]] name = "object_store" version = "0.12.4" @@ -4374,9 +4395,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "once_cell_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "oorandom" @@ -4413,15 +4434,15 @@ checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" [[package]] name = "owo-colors" -version = "4.2.2" +version = "4.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dd4f4a2c8405440fd0462561f0e5806bd0f77e86f51c761481bdd4018b545e" +checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" [[package]] name = "parking_lot" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -4429,15 +4450,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.11" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-link 0.2.1", ] [[package]] @@ -4500,7 +4521,7 @@ dependencies = [ "regex", "regex-syntax", "structmeta", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -4628,7 +4649,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -4701,7 +4722,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -4737,9 +4758,9 @@ dependencies = [ [[package]] name = "potential_utf" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" dependencies = [ "zerovec", ] @@ -4776,7 +4797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -4814,9 +4835,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] @@ -4847,7 +4868,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.108", + "syn 2.0.110", "tempfile", ] @@ -4861,7 +4882,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -4884,10 +4905,11 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.26" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" dependencies = [ + "ar_archive_writer", "cc", ] @@ -4956,7 +4978,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -4969,7 +4991,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -5045,9 +5067,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.41" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -5180,16 +5202,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] name = "redox_syscall" -version = "0.5.17" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", ] [[package]] @@ -5205,22 +5227,22 @@ dependencies = [ [[package]] name = "ref-cast" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" dependencies = [ "ref-cast-impl", ] [[package]] name = "ref-cast-impl" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -5248,23 +5270,23 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30" +checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "regress" -version = "0.10.4" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145bb27393fe455dd64d6cbc8d059adfa392590a45eadf079c01b11857e7b010" +checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" dependencies = [ - "hashbrown 0.15.5", + "hashbrown 0.16.0", "memchr", ] @@ -5294,9 +5316,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.23" +version = "0.12.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" +checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ "base64 0.22.1", "bytes", @@ -5402,7 +5424,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.108", + "syn 2.0.110", "unicode-ident", ] @@ -5414,14 +5436,14 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14" dependencies = [ "quote", "rand 0.8.5", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] name = "rust_decimal" -version = "1.38.0" +version = "1.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8975fc98059f365204d635119cf9c5a60ae67b841ed49b5422a9a7e56cdfac0" +checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282" dependencies = [ "arrayvec", "borsh", @@ -5455,18 +5477,18 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.0", + "windows-sys 0.61.2", ] [[package]] name = "rustls" -version = "0.23.32" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd3c25631629d034ce7cd9940adc9d45762d46de2b0f57193c4443b92c6d4d40" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "aws-lc-rs", "log", @@ -5480,9 +5502,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -5501,9 +5523,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a" dependencies = [ "web-time", "zeroize", @@ -5511,9 +5533,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.6" +version = "0.103.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8572f3c2cb9934231157b45499fc41e1f58c589fdfb81a844ba873265e80f8eb" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" dependencies = [ "aws-lc-rs", "ring", @@ -5533,7 +5555,7 @@ version = "17.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "cfg-if", "clipboard-win", "fd-lock", @@ -5544,7 +5566,7 @@ dependencies = [ "nix", "radix_trie", "unicode-segmentation", - "unicode-width 0.2.1", + "unicode-width 0.2.2", "utf8parse", "windows-sys 0.60.2", ] @@ -5570,7 +5592,7 @@ version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" dependencies = [ - "windows-sys 0.61.0", + "windows-sys 0.61.2", ] [[package]] @@ -5599,9 +5621,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.0.4" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289" dependencies = [ "dyn-clone", "ref-cast", @@ -5618,7 +5640,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -5635,11 +5657,11 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "security-framework" -version = "3.5.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc198e42d9b7510827939c9a15f5062a0c913f3371d765977e586d2fe6c16f4a" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "core-foundation", "core-foundation-sys", "libc", @@ -5709,7 +5731,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -5720,7 +5742,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -5744,7 +5766,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -5756,7 +5778,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -5773,9 +5795,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.14.1" +version = "3.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c522100790450cf78eeac1507263d0a350d4d5b30df0c8e1fe051a10c22b376e" +checksum = "aa66c845eee442168b2c8134fec70ac50dc20e760769c8ba0ad1319ca1959b04" dependencies = [ "base64 0.22.1", "chrono", @@ -5783,9 +5805,8 @@ dependencies = [ "indexmap 1.9.3", "indexmap 2.12.0", "schemars 0.9.0", - "schemars 1.0.4", - "serde", - "serde_derive", + "schemars 1.1.0", + "serde_core", "serde_json", "serde_with_macros", "time", @@ -5793,14 +5814,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.14.1" +version = "3.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327ada00f7d64abaac1e55a6911e90cf665aa051b9a561c7006c157f4633135e" +checksum = "b91a903660542fced4e99881aa481bdbaec1634568ee02e0b8bd57c64cb38955" dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -5924,12 +5945,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -5976,20 +5997,20 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] name = "stable_deref_trait" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" dependencies = [ "cc", "cfg-if", @@ -6024,7 +6045,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -6035,7 +6056,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -6062,31 +6083,12 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" - [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.108", -] - [[package]] name = "strum_macros" version = "0.27.2" @@ -6096,7 +6098,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -6130,7 +6132,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.108", + "syn 2.0.110", "typify", "walkdir", ] @@ -6154,9 +6156,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.108" +version = "2.0.110" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" dependencies = [ "proc-macro2", "quote", @@ -6180,7 +6182,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -6219,7 +6221,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.61.0", + "windows-sys 0.61.2", ] [[package]] @@ -6297,7 +6299,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -6362,9 +6364,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" dependencies = [ "displaydoc", "zerovec", @@ -6409,7 +6411,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.61.0", + "windows-sys 0.61.2", ] [[package]] @@ -6420,14 +6422,14 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] name = "tokio-postgres" -version = "0.7.14" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156efe7fff213168257853e1dfde202eed5f487522cbbbf7d219941d753d853" +checksum = "2b40d66d9b2cfe04b628173409368e58247e8eddbbd3b0e6c6ba1d09f20f6c9e" dependencies = [ "async-trait", "byteorder", @@ -6451,9 +6453,9 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.3" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f63835928ca123f1bef57abbcd23bb2ba0ac9ae1235f1e65bda0d06e7786bd" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ "rustls", "tokio", @@ -6472,9 +6474,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -6485,18 +6487,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f1085dec27c2b6632b04c80b3bb1b4300d6495d1e129693bdda7d91e72eec1" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" dependencies = [ "serde_core", ] [[package]] name = "toml_edit" -version = "0.23.6" +version = "0.23.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" dependencies = [ "indexmap 2.12.0", "toml_datetime", @@ -6506,9 +6508,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cf893c33be71572e0e9aa6dd15e6677937abd686b066eac3f8cd3531688a627" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" dependencies = [ "winnow", ] @@ -6578,7 +6580,7 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "bytes", "futures-util", "http 1.3.1", @@ -6621,7 +6623,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -6694,9 +6696,9 @@ checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" [[package]] name = "typenum" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "typewit" @@ -6729,7 +6731,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.108", + "syn 2.0.110", "thiserror", "unicode-ident", ] @@ -6747,7 +6749,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.108", + "syn 2.0.110", "typify-impl", ] @@ -6769,24 +6771,24 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" -version = "1.0.19" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-normalization" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" dependencies = [ "tinyvec", ] [[package]] name = "unicode-properties" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" [[package]] name = "unicode-segmentation" @@ -6802,9 +6804,9 @@ checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-width" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "unindent" @@ -6832,15 +6834,14 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "3.1.2" +version = "3.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ba1025f18a4a3fc3e9b48c868e9beb4f24f4b4b1a325bada26bd4119f46537" +checksum = "d39cb1dbab692d82a977c0392ffac19e188bd9186a9f32806f0aaa859d75585a" dependencies = [ "base64 0.22.1", "log", "percent-encoding", "rustls", - "rustls-pemfile", "rustls-pki-types", "ureq-proto", "utf-8", @@ -7010,7 +7011,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", "wasm-bindgen-shared", ] @@ -7044,7 +7045,7 @@ checksum = "085b2df989e1e6f9620c1311df6c996e83fe16f57792b272ce1e024ac16a90f1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -7082,9 +7083,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b130c0d2d49f8b6889abc456e795e82525204f27c42cf767cf0d7734e089b8" +checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" dependencies = [ "rustls-pki-types", ] @@ -7122,7 +7123,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.0", + "windows-sys 0.61.2", ] [[package]] @@ -7168,15 +7169,15 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.62.0" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57fe7168f7de578d2d8a05b07fd61870d2e73b4020e9f49aa00da8471723497c" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link 0.2.0", - "windows-result 0.4.0", - "windows-strings 0.5.0", + "windows-link 0.2.1", + "windows-result 0.4.1", + "windows-strings 0.5.1", ] [[package]] @@ -7192,24 +7193,24 @@ dependencies = [ [[package]] name = "windows-implement" -version = "0.60.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] name = "windows-interface" -version = "0.59.1" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -7220,9 +7221,9 @@ checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] name = "windows-link" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-numerics" @@ -7245,11 +7246,11 @@ dependencies = [ [[package]] name = "windows-result" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -7263,11 +7264,11 @@ dependencies = [ [[package]] name = "windows-strings" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -7294,16 +7295,16 @@ version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.53.3", + "windows-targets 0.53.5", ] [[package]] name = "windows-sys" -version = "0.61.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e201184e40b2ede64bc2ea34968b28e33622acdbbf37104f0e4a33f7abe657aa" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.0", + "windows-link 0.2.1", ] [[package]] @@ -7324,19 +7325,19 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.53.3" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link 0.1.3", - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", + "windows-link 0.2.1", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -7356,9 +7357,9 @@ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -7368,9 +7369,9 @@ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_aarch64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" @@ -7380,9 +7381,9 @@ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" [[package]] name = "windows_i686_gnullvm" @@ -7392,9 +7393,9 @@ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" @@ -7404,9 +7405,9 @@ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_i686_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" @@ -7416,9 +7417,9 @@ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] name = "windows_x86_64_gnullvm" @@ -7428,9 +7429,9 @@ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" @@ -7440,9 +7441,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "windows_x86_64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" @@ -7461,9 +7462,9 @@ checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "writeable" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" [[package]] name = "wyz" @@ -7507,11 +7508,10 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "yoke" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" dependencies = [ - "serde", "stable_deref_trait", "yoke-derive", "zerofrom", @@ -7519,13 +7519,13 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", "synstructure", ] @@ -7546,7 +7546,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] @@ -7566,21 +7566,21 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", "synstructure", ] [[package]] name = "zeroize" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" dependencies = [ "displaydoc", "yoke", @@ -7589,9 +7589,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ "yoke", "zerofrom", @@ -7600,13 +7600,13 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.108", + "syn 2.0.110", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index f15929b4c2b0..36198430e40b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,7 +79,7 @@ repository = "https://github.com/apache/datafusion" # Define Minimum Supported Rust Version (MSRV) rust-version = "1.88.0" # Define DataFusion version -version = "50.3.0" +version = "51.0.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -111,43 +111,43 @@ chrono = { version = "0.4.42", default-features = false } criterion = "0.7" ctor = "0.6.1" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "50.3.0", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "50.3.0" } -datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "50.3.0" } -datafusion-common = { path = "datafusion/common", version = "50.3.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "50.3.0" } -datafusion-datasource = { path = "datafusion/datasource", version = "50.3.0", default-features = false } -datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "50.3.0", default-features = false } -datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "50.3.0", default-features = false } -datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "50.3.0", default-features = false } -datafusion-datasource-json = { path = "datafusion/datasource-json", version = "50.3.0", default-features = false } -datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "50.3.0", default-features = false } -datafusion-doc = { path = "datafusion/doc", version = "50.3.0" } -datafusion-execution = { path = "datafusion/execution", version = "50.3.0", default-features = false } -datafusion-expr = { path = "datafusion/expr", version = "50.3.0", default-features = false } -datafusion-expr-common = { path = "datafusion/expr-common", version = "50.3.0" } -datafusion-ffi = { path = "datafusion/ffi", version = "50.3.0" } -datafusion-functions = { path = "datafusion/functions", version = "50.3.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "50.3.0" } -datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "50.3.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "50.3.0", default-features = false } -datafusion-functions-table = { path = "datafusion/functions-table", version = "50.3.0" } -datafusion-functions-window = { path = "datafusion/functions-window", version = "50.3.0" } -datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "50.3.0" } -datafusion-macros = { path = "datafusion/macros", version = "50.3.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "50.3.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "50.3.0", default-features = false } -datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "50.3.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "50.3.0", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "50.3.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "50.3.0" } -datafusion-proto = { path = "datafusion/proto", version = "50.3.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "50.3.0" } -datafusion-pruning = { path = "datafusion/pruning", version = "50.3.0" } -datafusion-session = { path = "datafusion/session", version = "50.3.0" } -datafusion-spark = { path = "datafusion/spark", version = "50.3.0" } -datafusion-sql = { path = "datafusion/sql", version = "50.3.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "50.3.0" } +datafusion = { path = "datafusion/core", version = "51.0.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "51.0.0" } +datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "51.0.0" } +datafusion-common = { path = "datafusion/common", version = "51.0.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "51.0.0" } +datafusion-datasource = { path = "datafusion/datasource", version = "51.0.0", default-features = false } +datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "51.0.0", default-features = false } +datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "51.0.0", default-features = false } +datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "51.0.0", default-features = false } +datafusion-datasource-json = { path = "datafusion/datasource-json", version = "51.0.0", default-features = false } +datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "51.0.0", default-features = false } +datafusion-doc = { path = "datafusion/doc", version = "51.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "51.0.0", default-features = false } +datafusion-expr = { path = "datafusion/expr", version = "51.0.0", default-features = false } +datafusion-expr-common = { path = "datafusion/expr-common", version = "51.0.0" } +datafusion-ffi = { path = "datafusion/ffi", version = "51.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "51.0.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "51.0.0" } +datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "51.0.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "51.0.0", default-features = false } +datafusion-functions-table = { path = "datafusion/functions-table", version = "51.0.0" } +datafusion-functions-window = { path = "datafusion/functions-window", version = "51.0.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "51.0.0" } +datafusion-macros = { path = "datafusion/macros", version = "51.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "51.0.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "51.0.0", default-features = false } +datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "51.0.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "51.0.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "51.0.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "51.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "51.0.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "51.0.0" } +datafusion-pruning = { path = "datafusion/pruning", version = "51.0.0" } +datafusion-session = { path = "datafusion/session", version = "51.0.0" } +datafusion-spark = { path = "datafusion/spark", version = "51.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "51.0.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "51.0.0" } doc-comment = "0.3" env_logger = "0.11" diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs index d079a88a6440..d95a22aa9537 100644 --- a/datafusion-cli/src/exec.rs +++ b/datafusion-cli/src/exec.rs @@ -153,7 +153,7 @@ pub async fn exec_from_repl( } } else { eprintln!( - "'\\{}' is not a valid command", + "'\\{}' is not a valid command, you can use '\\?' to see all commands", &line[1..] ); } @@ -168,7 +168,7 @@ pub async fn exec_from_repl( } } } else { - eprintln!("'\\{}' is not a valid command", &line[1..]); + eprintln!("'\\{}' is not a valid command, you can use '\\?' to see all commands", &line[1..]); } } Ok(line) => { diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 38f1f8b0e0ca..61711f8472eb 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -43,10 +43,6 @@ path = "examples/external_dependency/dataframe-to-s3.rs" name = "query_aws_s3" path = "examples/external_dependency/query-aws-s3.rs" -[[example]] -name = "custom_file_casts" -path = "examples/custom_file_casts.rs" - [dev-dependencies] arrow = { workspace = true } # arrow_schema is required for record_batch! macro :sad: diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md index f87f62e170af..62e51a790014 100644 --- a/datafusion-examples/README.md +++ b/datafusion-examples/README.md @@ -54,19 +54,20 @@ cargo run --example dataframe - [`analyzer_rule.rs`](examples/analyzer_rule.rs): Use a custom AnalyzerRule to change a query's semantics (row level access control) - [`catalog.rs`](examples/catalog.rs): Register the table into a custom catalog - [`composed_extension_codec`](examples/composed_extension_codec.rs): Example of using multiple extension codecs for serialization / deserialization -- [`csv_sql_streaming.rs`](examples/csv_sql_streaming.rs): Build and run a streaming query plan from a SQL statement against a local CSV file -- [`csv_json_opener.rs`](examples/csv_json_opener.rs): Use low level `FileOpener` APIs to read CSV/JSON into Arrow `RecordBatch`es -- [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against a custom datasource (TableProvider) -- [`custom_file_casts.rs`](examples/custom_file_casts.rs): Implement custom casting rules to adapt file schemas -- [`custom_file_format.rs`](examples/custom_file_format.rs): Write data to a custom file format +- [`examples/custom_data_source/csv_sql_streaming.rs`](examples/custom_data_source/csv_sql_streaming.rs): Build and run a streaming query plan from a SQL statement against a local CSV file +- [`examples/custom_data_source/csv_json_opener.rs`](examples/custom_data_source/csv_json_opener.rs): Use low level `FileOpener` APIs to read CSV/JSON into Arrow `RecordBatch`es +- [`examples/custom_data_source/custom_datasource.rs`](examples/custom_data_source/custom_datasource.rs): Run queries against a custom datasource (TableProvider) +- [`examples/custom_data_source/custom_file_casts.rs`](examples/custom_data_source/custom_file_casts.rs): Implement custom casting rules to adapt file schemas +- [`examples/custom_data_source/custom_file_format.rs`](examples/custom_data_source/custom_file_format.rs): Write data to a custom file format - [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3 and writing back to s3 - [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame API against parquet files, csv files, and in-memory data, including multiple subqueries. Also demonstrates the various methods to write out a DataFrame to a table, parquet file, csv file, and json file. +- [`examples/builtin_functions/date_time`](examples/builtin_functions/date_time.rs): Examples of date-time related functions and queries - [`default_column_values.rs`](examples/default_column_values.rs): Implement custom default value handling for missing columns using field metadata and PhysicalExprAdapter - [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert query results (Arrow ArrayRefs) into Rust structs - [`expr_api.rs`](examples/expr_api.rs): Create, execute, simplify, analyze and coerce `Expr`s -- [`file_stream_provider.rs`](examples/file_stream_provider.rs): Run a query on `FileStreamProvider` which implements `StreamProvider` for reading and writing to arbitrary stream sources / sinks. +- [`examples/custom_data_source/file_stream_provider.rs`](examples/custom_data_source/file_stream_provider.rs): Run a query on `FileStreamProvider` which implements `StreamProvider` for reading and writing to arbitrary stream sources / sinks. - [`flight/sql_server.rs`](examples/flight/sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from Flight and and FlightSQL (e.g. JDBC) clients -- [`function_factory.rs`](examples/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros +- [`examples/builtin_functions/function_factory.rs`](examples/builtin_functions/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros - [`memory_pool_tracking.rs`](examples/memory_pool_tracking.rs): Demonstrates TrackConsumersPool for memory tracking and debugging with enhanced error messages - [`memory_pool_execution_plan.rs`](examples/memory_pool_execution_plan.rs): Shows how to implement memory-aware ExecutionPlan with memory reservation and spilling - [`optimizer_rule.rs`](examples/optimizer_rule.rs): Use a custom OptimizerRule to replace certain predicates @@ -81,7 +82,7 @@ cargo run --example dataframe - [`pruning.rs`](examples/pruning.rs): Use pruning to rule out files based on statistics - [`query-aws-s3.rs`](examples/external_dependency/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3 - [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` and run a query against files vi HTTP -- [`regexp.rs`](examples/regexp.rs): Examples of using regular expression functions +- [`examples/builtin_functions/regexp.rs`](examples/builtin_functions/regexp.rs): Examples of using regular expression functions - [`remote_catalog.rs`](examples/regexp.rs): Examples of interfacing with a remote catalog (e.g. over a network) - [`examples/udf/simple_udaf.rs`](examples/udf/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF) - [`examples/udf/simple_udf.rs`](examples/udf/simple_udf.rs): Define and invoke a User Defined Scalar Function (UDF) @@ -91,7 +92,6 @@ cargo run --example dataframe - [`sql_frontend.rs`](examples/sql_frontend.rs): Create LogicalPlans (only) from sql strings - [`sql_dialect.rs`](examples/sql_dialect.rs): Example of implementing a custom SQL dialect on top of `DFParser` - [`sql_query.rs`](examples/memtable.rs): Query data using SQL (in memory `RecordBatches`, local Parquet files) -- [`date_time_function.rs`](examples/date_time_function.rs): Examples of date-time related functions and queries. ## Distributed diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/advanced_parquet_index.rs index 371c18de354c..67bfc5b1bcf5 100644 --- a/datafusion-examples/examples/advanced_parquet_index.rs +++ b/datafusion-examples/examples/advanced_parquet_index.rs @@ -491,19 +491,18 @@ impl TableProvider for IndexTableProvider { .with_file(indexed_file); let file_source = Arc::new( - ParquetSource::default() + ParquetSource::new(schema.clone()) // provide the predicate so the DataSourceExec can try and prune // row groups internally .with_predicate(predicate) // provide the factory to create parquet reader without re-reading metadata .with_parquet_file_reader_factory(Arc::new(reader_factory)), ); - let file_scan_config = - FileScanConfigBuilder::new(object_store_url, schema, file_source) - .with_limit(limit) - .with_projection_indices(projection.cloned()) - .with_file(partitioned_file) - .build(); + let file_scan_config = FileScanConfigBuilder::new(object_store_url, file_source) + .with_limit(limit) + .with_projection_indices(projection.cloned()) + .with_file(partitioned_file) + .build(); // Finally, put it all together into a DataSourceExec Ok(DataSourceExec::from_data_source(file_scan_config)) diff --git a/datafusion-examples/examples/date_time_functions.rs b/datafusion-examples/examples/builtin_functions/date_time.rs similarity index 97% rename from datafusion-examples/examples/date_time_functions.rs rename to datafusion-examples/examples/builtin_functions/date_time.rs index 2628319ae31f..178cba979cb9 100644 --- a/datafusion-examples/examples/date_time_functions.rs +++ b/datafusion-examples/examples/builtin_functions/date_time.rs @@ -26,8 +26,20 @@ use datafusion::common::assert_contains; use datafusion::error::Result; use datafusion::prelude::*; -#[tokio::main] -async fn main() -> Result<()> { +/// Example: Working with Date and Time Functions +/// +/// This example demonstrates how to work with various date and time +/// functions in DataFusion using both the DataFrame API and SQL queries. +/// +/// It includes: +/// - `make_date`: building `DATE` values from year, month, and day columns +/// - `to_date`: converting string expressions into `DATE` values +/// - `to_timestamp`: parsing strings or numeric values into `TIMESTAMP`s +/// - `to_char`: formatting dates, timestamps, and durations as strings +/// +/// Together, these examples show how to create, convert, and format temporal +/// data using DataFusion’s built-in functions. +pub async fn date_time() -> Result<()> { query_make_date().await?; query_to_date().await?; query_to_timestamp().await?; diff --git a/datafusion-examples/examples/function_factory.rs b/datafusion-examples/examples/builtin_functions/function_factory.rs similarity index 99% rename from datafusion-examples/examples/function_factory.rs rename to datafusion-examples/examples/builtin_functions/function_factory.rs index d4312ae59409..5d41e7a26071 100644 --- a/datafusion-examples/examples/function_factory.rs +++ b/datafusion-examples/examples/builtin_functions/function_factory.rs @@ -42,8 +42,7 @@ use std::sync::Arc; /// /// This example is rather simple and does not cover all cases required for a /// real implementation. -#[tokio::main] -async fn main() -> Result<()> { +pub async fn function_factory() -> Result<()> { // First we must configure the SessionContext with our function factory let ctx = SessionContext::new() // register custom function factory diff --git a/datafusion-examples/examples/builtin_functions/main.rs b/datafusion-examples/examples/builtin_functions/main.rs new file mode 100644 index 000000000000..c307bc9532bf --- /dev/null +++ b/datafusion-examples/examples/builtin_functions/main.rs @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! # These are miscellaneous function-related examples +//! +//! These examples demonstrate miscellaneous function-related features. +//! +//! ## Usage +//! ```bash +//! cargo run --example builtin_functions -- [date_time|function_factory|regexp] +//! ``` +//! +//! Each subcommand runs a corresponding example: +//! - `date_time` — examples of date-time related functions and queries +//! - `function_factory` — register `CREATE FUNCTION` handler to implement SQL macros +//! - `regexp` — examples of using regular expression functions + +mod date_time; +mod function_factory; +mod regexp; + +use std::str::FromStr; + +use datafusion::error::{DataFusionError, Result}; + +enum ExampleKind { + DateTime, + FunctionFactory, + Regexp, +} + +impl AsRef for ExampleKind { + fn as_ref(&self) -> &str { + match self { + Self::DateTime => "date_time", + Self::FunctionFactory => "function_factory", + Self::Regexp => "regexp", + } + } +} + +impl FromStr for ExampleKind { + type Err = DataFusionError; + + fn from_str(s: &str) -> Result { + match s { + "date_time" => Ok(Self::DateTime), + "function_factory" => Ok(Self::FunctionFactory), + "regexp" => Ok(Self::Regexp), + _ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))), + } + } +} + +impl ExampleKind { + const ALL: [Self; 3] = [Self::DateTime, Self::FunctionFactory, Self::Regexp]; + + const EXAMPLE_NAME: &str = "builtin_functions"; + + fn variants() -> Vec<&'static str> { + Self::ALL.iter().map(|x| x.as_ref()).collect() + } +} + +#[tokio::main] +async fn main() -> Result<()> { + let usage = format!( + "Usage: cargo run --example {} -- [{}]", + ExampleKind::EXAMPLE_NAME, + ExampleKind::variants().join("|") + ); + + let arg = std::env::args().nth(1).ok_or_else(|| { + eprintln!("{usage}"); + DataFusionError::Execution("Missing argument".to_string()) + })?; + + match arg.parse::()? { + ExampleKind::DateTime => date_time::date_time().await?, + ExampleKind::FunctionFactory => function_factory::function_factory().await?, + ExampleKind::Regexp => regexp::regexp().await?, + } + + Ok(()) +} diff --git a/datafusion-examples/examples/regexp.rs b/datafusion-examples/examples/builtin_functions/regexp.rs similarity index 99% rename from datafusion-examples/examples/regexp.rs rename to datafusion-examples/examples/builtin_functions/regexp.rs index 12d115b9b502..13c078693028 100644 --- a/datafusion-examples/examples/regexp.rs +++ b/datafusion-examples/examples/builtin_functions/regexp.rs @@ -28,12 +28,11 @@ use datafusion::prelude::*; /// /// Supported flags can be found at /// https://docs.rs/regex/latest/regex/#grouping-and-flags -#[tokio::main] -async fn main() -> Result<()> { +pub async fn regexp() -> Result<()> { let ctx = SessionContext::new(); ctx.register_csv( "examples", - "../../datafusion/physical-expr/tests/data/regex.csv", + "datafusion/physical-expr/tests/data/regex.csv", CsvReadOptions::new(), ) .await?; diff --git a/datafusion-examples/examples/csv_json_opener.rs b/datafusion-examples/examples/custom_data_source/csv_json_opener.rs similarity index 90% rename from datafusion-examples/examples/csv_json_opener.rs rename to datafusion-examples/examples/custom_data_source/csv_json_opener.rs index ef2a3eaca0c8..4205bbcdf86a 100644 --- a/datafusion-examples/examples/csv_json_opener.rs +++ b/datafusion-examples/examples/custom_data_source/csv_json_opener.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use arrow::datatypes::{DataType, Field, Schema}; +use datafusion::common::config::CsvOptions; use datafusion::{ assert_batches_eq, datasource::{ @@ -31,9 +32,7 @@ use datafusion::{ test_util::aggr_test_schema, }; -use datafusion::datasource::{ - physical_plan::FileScanConfigBuilder, table_schema::TableSchema, -}; +use datafusion::datasource::physical_plan::FileScanConfigBuilder; use futures::StreamExt; use object_store::{local::LocalFileSystem, memory::InMemory, ObjectStore}; @@ -41,8 +40,7 @@ use object_store::{local::LocalFileSystem, memory::InMemory, ObjectStore}; /// read data from (CSV/JSON) into Arrow RecordBatches. /// /// If you want to query data in CSV or JSON files, see the [`dataframe.rs`] and [`sql_query.rs`] examples -#[tokio::main] -async fn main() -> Result<()> { +pub async fn csv_json_opener() -> Result<()> { csv_opener().await?; json_opener().await?; Ok(()) @@ -57,19 +55,25 @@ async fn csv_opener() -> Result<()> { let path = std::path::Path::new(&path).canonicalize()?; + let options = CsvOptions { + has_header: Some(true), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + let scan_config = FileScanConfigBuilder::new( ObjectStoreUrl::local_filesystem(), - Arc::clone(&schema), - Arc::new(CsvSource::default()), + Arc::new(CsvSource::new(Arc::clone(&schema)).with_csv_options(options.clone())), ) .with_projection_indices(Some(vec![12, 0])) .with_limit(Some(5)) .with_file(PartitionedFile::new(path.display().to_string(), 10)) .build(); - let config = CsvSource::new(true, b',', b'"') + let config = CsvSource::new(Arc::clone(&schema)) + .with_csv_options(options) .with_comment(Some(b'#')) - .with_schema(TableSchema::from_file_schema(schema)) .with_batch_size(8192) .with_projection(&scan_config); @@ -125,8 +129,7 @@ async fn json_opener() -> Result<()> { let scan_config = FileScanConfigBuilder::new( ObjectStoreUrl::local_filesystem(), - schema, - Arc::new(JsonSource::default()), + Arc::new(JsonSource::new(schema)), ) .with_projection_indices(Some(vec![1, 0])) .with_limit(Some(5)) diff --git a/datafusion-examples/examples/csv_sql_streaming.rs b/datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs similarity index 98% rename from datafusion-examples/examples/csv_sql_streaming.rs rename to datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs index 99264bbcb486..aca63c4f35c2 100644 --- a/datafusion-examples/examples/csv_sql_streaming.rs +++ b/datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs @@ -21,8 +21,7 @@ use datafusion::prelude::*; /// This example demonstrates executing a simple query against an Arrow data source (CSV) and /// fetching results with streaming aggregation and streaming window -#[tokio::main] -async fn main() -> Result<()> { +pub async fn csv_sql_streaming() -> Result<()> { // create local execution context let ctx = SessionContext::new(); diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_data_source/custom_datasource.rs similarity index 99% rename from datafusion-examples/examples/custom_datasource.rs rename to datafusion-examples/examples/custom_data_source/custom_datasource.rs index bc865fac5a33..2213d50fccda 100644 --- a/datafusion-examples/examples/custom_datasource.rs +++ b/datafusion-examples/examples/custom_data_source/custom_datasource.rs @@ -42,8 +42,7 @@ use datafusion::catalog::Session; use tokio::time::timeout; /// This example demonstrates executing a simple query against a custom datasource -#[tokio::main] -async fn main() -> Result<()> { +pub async fn custom_datasource() -> Result<()> { // create our custom datasource and adding some users let db = CustomDataSource::default(); db.populate_users(); diff --git a/datafusion-examples/examples/custom_file_casts.rs b/datafusion-examples/examples/custom_data_source/custom_file_casts.rs similarity index 99% rename from datafusion-examples/examples/custom_file_casts.rs rename to datafusion-examples/examples/custom_data_source/custom_file_casts.rs index 4d97ecd91dc6..31ec2845c611 100644 --- a/datafusion-examples/examples/custom_file_casts.rs +++ b/datafusion-examples/examples/custom_data_source/custom_file_casts.rs @@ -44,9 +44,7 @@ use object_store::{ObjectStore, PutPayload}; // This example enforces that casts must be strictly widening: if the file type is Int64 and the table type is Int32, it will error // before even reading the data. // Without this custom cast rule DataFusion would happily do the narrowing cast, potentially erroring only if it found a row with data it could not cast. - -#[tokio::main] -async fn main() -> Result<()> { +pub async fn custom_file_casts() -> Result<()> { println!("=== Creating example data ==="); // Create a logical / table schema with an Int32 column diff --git a/datafusion-examples/examples/custom_file_format.rs b/datafusion-examples/examples/custom_data_source/custom_file_format.rs similarity index 95% rename from datafusion-examples/examples/custom_file_format.rs rename to datafusion-examples/examples/custom_data_source/custom_file_format.rs index 67fe642fd46e..510fa53c593f 100644 --- a/datafusion-examples/examples/custom_file_format.rs +++ b/datafusion-examples/examples/custom_data_source/custom_file_format.rs @@ -30,6 +30,7 @@ use datafusion::{ FileFormat, FileFormatFactory, }, physical_plan::{FileScanConfig, FileSinkConfig, FileSource}, + table_schema::TableSchema, MemTable, }, error::Result, @@ -47,6 +48,42 @@ use tempfile::tempdir; /// TSVFileFormatFactory is responsible for creating instances of TSVFileFormat. /// The former, once registered with the SessionState, will then be used /// to facilitate SQL operations on TSV files, such as `COPY TO` shown here. +pub async fn custom_file_format() -> Result<()> { + // Create a new context with the default configuration + let mut state = SessionStateBuilder::new().with_default_features().build(); + + // Register the custom file format + let file_format = Arc::new(TSVFileFactory::new()); + state.register_file_format(file_format, true)?; + + // Create a new context with the custom file format + let ctx = SessionContext::new_with_state(state); + + let mem_table = create_mem_table(); + ctx.register_table("mem_table", mem_table)?; + + let temp_dir = tempdir().unwrap(); + let table_save_path = temp_dir.path().join("mem_table.tsv"); + + let d = ctx + .sql(&format!( + "COPY mem_table TO '{}' STORED AS TSV;", + table_save_path.display(), + )) + .await?; + + let results = d.collect().await?; + println!( + "Number of inserted rows: {:?}", + (results[0] + .column_by_name("count") + .unwrap() + .as_primitive::() + .value(0)) + ); + + Ok(()) +} #[derive(Debug)] /// Custom file format that reads and writes TSV files @@ -128,8 +165,8 @@ impl FileFormat for TSVFileFormat { .await } - fn file_source(&self) -> Arc { - self.csv_file_format.file_source() + fn file_source(&self, table_schema: TableSchema) -> Arc { + self.csv_file_format.file_source(table_schema) } } @@ -180,44 +217,6 @@ impl GetExt for TSVFileFactory { } } -#[tokio::main] -async fn main() -> Result<()> { - // Create a new context with the default configuration - let mut state = SessionStateBuilder::new().with_default_features().build(); - - // Register the custom file format - let file_format = Arc::new(TSVFileFactory::new()); - state.register_file_format(file_format, true).unwrap(); - - // Create a new context with the custom file format - let ctx = SessionContext::new_with_state(state); - - let mem_table = create_mem_table(); - ctx.register_table("mem_table", mem_table).unwrap(); - - let temp_dir = tempdir().unwrap(); - let table_save_path = temp_dir.path().join("mem_table.tsv"); - - let d = ctx - .sql(&format!( - "COPY mem_table TO '{}' STORED AS TSV;", - table_save_path.display(), - )) - .await?; - - let results = d.collect().await?; - println!( - "Number of inserted rows: {:?}", - (results[0] - .column_by_name("count") - .unwrap() - .as_primitive::() - .value(0)) - ); - - Ok(()) -} - // create a simple mem table fn create_mem_table() -> Arc { let fields = vec![ diff --git a/datafusion-examples/examples/file_stream_provider.rs b/datafusion-examples/examples/custom_data_source/file_stream_provider.rs similarity index 91% rename from datafusion-examples/examples/file_stream_provider.rs rename to datafusion-examples/examples/custom_data_source/file_stream_provider.rs index e6c59d57e98d..55d2cc8cc0af 100644 --- a/datafusion-examples/examples/file_stream_provider.rs +++ b/datafusion-examples/examples/custom_data_source/file_stream_provider.rs @@ -15,6 +15,29 @@ // specific language governing permissions and limitations // under the License. +/// Demonstrates how to use [`FileStreamProvider`] and [`StreamTable`] to stream data +/// from a file-like source (FIFO) into DataFusion for continuous querying. +/// +/// On non-Windows systems, this example creates a named pipe (FIFO) and +/// writes rows into it asynchronously while DataFusion reads the data +/// through a `FileStreamProvider`. +/// +/// This illustrates how to integrate dynamically updated data sources +/// with DataFusion without needing to reload the entire dataset each time. +/// +/// This example does not work on Windows. +pub async fn file_stream_provider() -> datafusion::error::Result<()> { + #[cfg(target_os = "windows")] + { + println!("file_stream_provider example does not work on windows"); + Ok(()) + } + #[cfg(not(target_os = "windows"))] + { + non_windows::main().await + } +} + #[cfg(not(target_os = "windows"))] mod non_windows { use datafusion::assert_batches_eq; @@ -186,16 +209,3 @@ mod non_windows { Ok(()) } } - -#[tokio::main] -async fn main() -> datafusion::error::Result<()> { - #[cfg(target_os = "windows")] - { - println!("file_stream_provider example does not work on windows"); - Ok(()) - } - #[cfg(not(target_os = "windows"))] - { - non_windows::main().await - } -} diff --git a/datafusion-examples/examples/custom_data_source/main.rs b/datafusion-examples/examples/custom_data_source/main.rs new file mode 100644 index 000000000000..ce0585f8c3f7 --- /dev/null +++ b/datafusion-examples/examples/custom_data_source/main.rs @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! # These examples are all related to extending or defining how DataFusion reads data +//! +//! These examples demonstrate how DataFusion reads data. +//! +//! ## Usage +//! ```bash +//! cargo run --example custom_data_source -- [csv_json_opener|csv_sql_streaming|custom_datasource|custom_file_casts|custom_file_format|file_stream_provider] +//! ``` +//! +//! Each subcommand runs a corresponding example: +//! - `csv_json_opener` — use low level FileOpener APIs to read CSV/JSON into Arrow RecordBatches +//! - `csv_sql_streaming` — build and run a streaming query plan from a SQL statement against a local CSV file +//! - `custom_datasource` — run queries against a custom datasource (TableProvider) +//! - `custom_file_casts` — implement custom casting rules to adapt file schemas +//! - `custom_file_format` — write data to a custom file format +//! - `file_stream_provider` — run a query on FileStreamProvider which implements StreamProvider for reading and writing to arbitrary stream sources/sinks + +mod csv_json_opener; +mod csv_sql_streaming; +mod custom_datasource; +mod custom_file_casts; +mod custom_file_format; +mod file_stream_provider; + +use std::str::FromStr; + +use datafusion::error::{DataFusionError, Result}; + +enum ExampleKind { + CsvJsonOpener, + CsvSqlStreaming, + CustomDatasource, + CustomFileCasts, + CustomFileFormat, + FileFtreamProvider, +} + +impl AsRef for ExampleKind { + fn as_ref(&self) -> &str { + match self { + Self::CsvJsonOpener => "csv_json_opener", + Self::CsvSqlStreaming => "csv_sql_streaming", + Self::CustomDatasource => "custom_datasource", + Self::CustomFileCasts => "custom_file_casts", + Self::CustomFileFormat => "custom_file_format", + Self::FileFtreamProvider => "file_stream_provider", + } + } +} + +impl FromStr for ExampleKind { + type Err = DataFusionError; + + fn from_str(s: &str) -> Result { + match s { + "csv_json_opener" => Ok(Self::CsvJsonOpener), + "csv_sql_streaming" => Ok(Self::CsvSqlStreaming), + "custom_datasource" => Ok(Self::CustomDatasource), + "custom_file_casts" => Ok(Self::CustomFileCasts), + "custom_file_format" => Ok(Self::CustomFileFormat), + "file_stream_provider" => Ok(Self::FileFtreamProvider), + _ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))), + } + } +} + +impl ExampleKind { + const ALL: [Self; 6] = [ + Self::CsvJsonOpener, + Self::CsvSqlStreaming, + Self::CustomDatasource, + Self::CustomFileCasts, + Self::CustomFileFormat, + Self::FileFtreamProvider, + ]; + + const EXAMPLE_NAME: &str = "custom_data_source"; + + fn variants() -> Vec<&'static str> { + Self::ALL.iter().map(|x| x.as_ref()).collect() + } +} + +#[tokio::main] +async fn main() -> Result<()> { + let usage = format!( + "Usage: cargo run --example {} -- [{}]", + ExampleKind::EXAMPLE_NAME, + ExampleKind::variants().join("|") + ); + + let arg = std::env::args().nth(1).ok_or_else(|| { + eprintln!("{usage}"); + DataFusionError::Execution("Missing argument".to_string()) + })?; + + match arg.parse::()? { + ExampleKind::CsvJsonOpener => csv_json_opener::csv_json_opener().await?, + ExampleKind::CsvSqlStreaming => csv_sql_streaming::csv_sql_streaming().await?, + ExampleKind::CustomDatasource => custom_datasource::custom_datasource().await?, + ExampleKind::CustomFileCasts => custom_file_casts::custom_file_casts().await?, + ExampleKind::CustomFileFormat => custom_file_format::custom_file_format().await?, + ExampleKind::FileFtreamProvider => { + file_stream_provider::file_stream_provider().await? + } + } + + Ok(()) +} diff --git a/datafusion-examples/examples/default_column_values.rs b/datafusion-examples/examples/default_column_values.rs index d3a7d2ec67f3..bfc60519f26e 100644 --- a/datafusion-examples/examples/default_column_values.rs +++ b/datafusion-examples/examples/default_column_values.rs @@ -235,7 +235,7 @@ impl TableProvider for DefaultValueTableProvider { &df_schema, )?; - let parquet_source = ParquetSource::default() + let parquet_source = ParquetSource::new(schema.clone()) .with_predicate(filter) .with_pushdown_filters(true); @@ -257,7 +257,6 @@ impl TableProvider for DefaultValueTableProvider { let file_scan_config = FileScanConfigBuilder::new( ObjectStoreUrl::parse("memory://")?, - self.schema.clone(), Arc::new(parquet_source), ) .with_projection_indices(projection.cloned()) diff --git a/datafusion-examples/examples/flight/main.rs b/datafusion-examples/examples/flight/main.rs index a448789b353b..a83b19bac42e 100644 --- a/datafusion-examples/examples/flight/main.rs +++ b/datafusion-examples/examples/flight/main.rs @@ -19,6 +19,11 @@ //! //! These examples demonstrate Arrow Flight usage. //! +//! ## Usage +//! ```bash +//! cargo run --example flight -- [client|server|sql_server] +//! ``` +//! //! Each subcommand runs a corresponding example: //! - `client` — run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol //! - `server` — run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol diff --git a/datafusion-examples/examples/parquet_embedded_index.rs b/datafusion-examples/examples/parquet_embedded_index.rs index 3cbe18914775..bc0e5a072caa 100644 --- a/datafusion-examples/examples/parquet_embedded_index.rs +++ b/datafusion-examples/examples/parquet_embedded_index.rs @@ -426,8 +426,10 @@ impl TableProvider for DistinctIndexTable { // Build ParquetSource to actually read the files let url = ObjectStoreUrl::parse("file://")?; - let source = Arc::new(ParquetSource::default().with_enable_page_index(true)); - let mut builder = FileScanConfigBuilder::new(url, self.schema.clone(), source); + let source = Arc::new( + ParquetSource::new(self.schema.clone()).with_enable_page_index(true), + ); + let mut builder = FileScanConfigBuilder::new(url, source); for file in files_to_scan { let path = self.dir.join(file); let len = std::fs::metadata(&path)?.len(); diff --git a/datafusion-examples/examples/parquet_index.rs b/datafusion-examples/examples/parquet_index.rs index a1dd1f1ffd10..bc9e2a9226d0 100644 --- a/datafusion-examples/examples/parquet_index.rs +++ b/datafusion-examples/examples/parquet_index.rs @@ -242,9 +242,10 @@ impl TableProvider for IndexTableProvider { let files = self.index.get_files(predicate.clone())?; let object_store_url = ObjectStoreUrl::parse("file://")?; - let source = Arc::new(ParquetSource::default().with_predicate(predicate)); + let source = + Arc::new(ParquetSource::new(self.schema()).with_predicate(predicate)); let mut file_scan_config_builder = - FileScanConfigBuilder::new(object_store_url, self.schema(), source) + FileScanConfigBuilder::new(object_store_url, source) .with_projection_indices(projection.cloned()) .with_limit(limit); diff --git a/datafusion-examples/examples/udf/main.rs b/datafusion-examples/examples/udf/main.rs index ba36dbb15c58..104d37393780 100644 --- a/datafusion-examples/examples/udf/main.rs +++ b/datafusion-examples/examples/udf/main.rs @@ -19,6 +19,11 @@ //! //! These examples demonstrate user-defined functions in DataFusion. //! +//! ## Usage +//! ```bash +//! cargo run --example udf -- [adv_udaf|adv_udf|adv_udwf|async_udf|udaf|udf|udtf|udwf] +//! ``` +//! //! Each subcommand runs a corresponding example: //! - `adv_udaf` — user defined aggregate function example //! - `adv_udf` — user defined scalar function example diff --git a/datafusion/catalog-listing/Cargo.toml b/datafusion/catalog-listing/Cargo.toml index 4b802c0067e5..be1374b37148 100644 --- a/datafusion/catalog-listing/Cargo.toml +++ b/datafusion/catalog-listing/Cargo.toml @@ -46,7 +46,6 @@ futures = { workspace = true } itertools = { workspace = true } log = { workspace = true } object_store = { workspace = true } -tokio = { workspace = true } [dev-dependencies] datafusion-datasource-parquet = { workspace = true } diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index 82cc36867939..089457648d21 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -25,12 +25,11 @@ use datafusion_common::internal_err; use datafusion_common::{HashMap, Result, ScalarValue}; use datafusion_datasource::ListingTableUrl; use datafusion_datasource::PartitionedFile; -use datafusion_expr::{BinaryExpr, Operator}; +use datafusion_expr::{lit, utils, BinaryExpr, Operator}; use arrow::{ - array::{Array, ArrayRef, AsArray, StringBuilder}, - compute::{and, cast, prep_null_mask_filter}, - datatypes::{DataType, Field, Fields, Schema}, + array::AsArray, + datatypes::{DataType, Field}, record_batch::RecordBatch, }; use datafusion_expr::execution_props::ExecutionProps; @@ -39,7 +38,7 @@ use futures::{stream::BoxStream, StreamExt, TryStreamExt}; use log::{debug, trace}; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; -use datafusion_common::{Column, DFSchema, DataFusionError}; +use datafusion_common::{Column, DFSchema}; use datafusion_expr::{Expr, Volatility}; use datafusion_physical_expr::create_physical_expr; use object_store::path::Path; @@ -239,105 +238,6 @@ pub async fn list_partitions( Ok(out) } -async fn prune_partitions( - table_path: &ListingTableUrl, - partitions: Vec, - filters: &[Expr], - partition_cols: &[(String, DataType)], -) -> Result> { - if filters.is_empty() { - // prune partitions which don't contain the partition columns - return Ok(partitions - .into_iter() - .filter(|p| { - let cols = partition_cols.iter().map(|x| x.0.as_str()); - !parse_partitions_for_path(table_path, &p.path, cols) - .unwrap_or_default() - .is_empty() - }) - .collect()); - } - - let mut builders: Vec<_> = (0..partition_cols.len()) - .map(|_| StringBuilder::with_capacity(partitions.len(), partitions.len() * 10)) - .collect(); - - for partition in &partitions { - let cols = partition_cols.iter().map(|x| x.0.as_str()); - let parsed = parse_partitions_for_path(table_path, &partition.path, cols) - .unwrap_or_default(); - - let mut builders = builders.iter_mut(); - for (p, b) in parsed.iter().zip(&mut builders) { - b.append_value(p); - } - builders.for_each(|b| b.append_null()); - } - - let arrays = partition_cols - .iter() - .zip(builders) - .map(|((_, d), mut builder)| { - let array = builder.finish(); - cast(&array, d) - }) - .collect::>()?; - - let fields: Fields = partition_cols - .iter() - .map(|(n, d)| Field::new(n, d.clone(), true)) - .collect(); - let schema = Arc::new(Schema::new(fields)); - - let df_schema = DFSchema::from_unqualified_fields( - partition_cols - .iter() - .map(|(n, d)| Field::new(n, d.clone(), true)) - .collect(), - Default::default(), - )?; - - let batch = RecordBatch::try_new(schema, arrays)?; - - // TODO: Plumb this down - let props = ExecutionProps::new(); - - // Applies `filter` to `batch` returning `None` on error - let do_filter = |filter| -> Result { - let expr = create_physical_expr(filter, &df_schema, &props)?; - expr.evaluate(&batch)?.into_array(partitions.len()) - }; - - //.Compute the conjunction of the filters - let mask = filters - .iter() - .map(|f| do_filter(f).map(|a| a.as_boolean().clone())) - .reduce(|a, b| Ok(and(&a?, &b?)?)); - - let mask = match mask { - Some(Ok(mask)) => mask, - Some(Err(err)) => return Err(err), - None => return Ok(partitions), - }; - - // Don't retain partitions that evaluated to null - let prepared = match mask.null_count() { - 0 => mask, - _ => prep_null_mask_filter(&mask), - }; - - // Sanity check - assert_eq!(prepared.len(), partitions.len()); - - let filtered = partitions - .into_iter() - .zip(prepared.values()) - .filter_map(|(p, f)| f.then_some(p)) - .collect(); - - Ok(filtered) -} - #[derive(Debug)] enum PartitionValue { Single(String), @@ -412,6 +312,62 @@ pub fn evaluate_partition_prefix<'a>( } } +fn filter_partitions( + pf: PartitionedFile, + filters: &[Expr], + df_schema: &DFSchema, +) -> Result> { + if pf.partition_values.is_empty() && !filters.is_empty() { + return Ok(None); + } else if filters.is_empty() { + return Ok(Some(pf)); + } + + let arrays = pf + .partition_values + .iter() + .map(|v| v.to_array()) + .collect::>()?; + + let batch = RecordBatch::try_new(Arc::clone(df_schema.inner()), arrays)?; + + let filter = utils::conjunction(filters.iter().cloned()).unwrap_or_else(|| lit(true)); + let props = ExecutionProps::new(); + let expr = create_physical_expr(&filter, df_schema, &props)?; + + // Since we're only operating on a single file, our batch and resulting "array" holds only one + // value indicating if the input file matches the provided filters + let matches = expr.evaluate(&batch)?.into_array(1)?; + if matches.as_boolean().value(0) { + return Ok(Some(pf)); + } + + Ok(None) +} + +fn try_into_partitioned_file( + object_meta: ObjectMeta, + partition_cols: &[(String, DataType)], + table_path: &ListingTableUrl, +) -> Result { + let cols = partition_cols.iter().map(|(name, _)| name.as_str()); + let parsed = parse_partitions_for_path(table_path, &object_meta.location, cols); + + let partition_values = parsed + .into_iter() + .flatten() + .zip(partition_cols) + .map(|(parsed, (_, datatype))| { + ScalarValue::try_from_string(parsed.to_string(), datatype) + }) + .collect::>>()?; + + let mut pf: PartitionedFile = object_meta.into(); + pf.partition_values = partition_values; + + Ok(pf) +} + /// Discover the partitions on the given path and prune out files /// that belong to irrelevant partitions using `filters` expressions. /// `filters` should only contain expressions that can be evaluated @@ -424,7 +380,11 @@ pub async fn pruned_partition_list<'a>( file_extension: &'a str, partition_cols: &'a [(String, DataType)], ) -> Result>> { - // if no partition col => simply list all the files + let objects = table_path + .list_all_files(ctx, store, file_extension) + .await? + .try_filter(|object_meta| futures::future::ready(object_meta.size > 0)); + if partition_cols.is_empty() { if !filters.is_empty() { return internal_err!( @@ -432,72 +392,29 @@ pub async fn pruned_partition_list<'a>( table_path ); } - return Ok(Box::pin( - table_path - .list_all_files(ctx, store, file_extension) - .await? - .try_filter(|object_meta| futures::future::ready(object_meta.size > 0)) - .map_ok(|object_meta| object_meta.into()), - )); - } - - let partition_prefix = evaluate_partition_prefix(partition_cols, filters); - - let partitions = - list_partitions(store, table_path, partition_cols.len(), partition_prefix) - .await?; - debug!("Listed {} partitions", partitions.len()); - let pruned = - prune_partitions(table_path, partitions, filters, partition_cols).await?; - - debug!("Pruning yielded {} partitions", pruned.len()); - - let stream = futures::stream::iter(pruned) - .map(move |partition: Partition| async move { - let cols = partition_cols.iter().map(|x| x.0.as_str()); - let parsed = parse_partitions_for_path(table_path, &partition.path, cols); + // if no partition col => simply list all the files + Ok(objects.map_ok(|object_meta| object_meta.into()).boxed()) + } else { + let df_schema = DFSchema::from_unqualified_fields( + partition_cols + .iter() + .map(|(n, d)| Field::new(n, d.clone(), true)) + .collect(), + Default::default(), + )?; - let partition_values = parsed - .into_iter() - .flatten() - .zip(partition_cols) - .map(|(parsed, (_, datatype))| { - ScalarValue::try_from_string(parsed.to_string(), datatype) - }) - .collect::>>()?; - - let files = match partition.files { - Some(files) => files, - None => { - trace!("Recursively listing partition {}", partition.path); - store.list(Some(&partition.path)).try_collect().await? - } - }; - let files = files.into_iter().filter(move |o| { - let extension_match = o.location.as_ref().ends_with(file_extension); - // here need to scan subdirectories(`listing_table_ignore_subdirectory` = false) - let glob_match = table_path.contains(&o.location, false); - extension_match && glob_match - }); - - let stream = futures::stream::iter(files.map(move |object_meta| { - Ok(PartitionedFile { - object_meta, - partition_values: partition_values.clone(), - range: None, - statistics: None, - extensions: None, - metadata_size_hint: None, - }) - })); - - Ok::<_, DataFusionError>(stream) - }) - .buffer_unordered(CONCURRENCY_LIMIT) - .try_flatten() - .boxed(); - Ok(stream) + Ok(objects + .map_ok(|object_meta| { + try_into_partitioned_file(object_meta, partition_cols, table_path) + }) + .try_filter_map(move |pf| { + futures::future::ready( + pf.and_then(|pf| filter_partitions(pf, filters, &df_schema)), + ) + }) + .boxed()) + } } /// Extract the partition values for the given `file_path` (in the given `table_path`) @@ -541,22 +458,11 @@ pub fn describe_partition(partition: &Partition) -> (&str, usize, Vec<&str>) { #[cfg(test)] mod tests { - use async_trait::async_trait; - use datafusion_common::config::TableOptions; use datafusion_datasource::file_groups::FileGroup; - use datafusion_execution::config::SessionConfig; - use datafusion_execution::runtime_env::RuntimeEnv; - use futures::FutureExt; - use object_store::memory::InMemory; - use std::any::Any; use std::ops::Not; use super::*; - use datafusion_expr::{ - case, col, lit, AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF, - }; - use datafusion_physical_expr_common::physical_expr::PhysicalExpr; - use datafusion_physical_plan::ExecutionPlan; + use datafusion_expr::{case, col, lit, Expr}; #[test] fn test_split_files() { @@ -599,209 +505,6 @@ mod tests { assert_eq!(0, chunks.len()); } - #[tokio::test] - async fn test_pruned_partition_list_empty() { - let (store, state) = make_test_store_and_state(&[ - ("tablepath/mypartition=val1/notparquetfile", 100), - ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0), - ("tablepath/file.parquet", 100), - ("tablepath/notapartition/file.parquet", 100), - ("tablepath/notmypartition=val1/file.parquet", 100), - ]); - let filter = Expr::eq(col("mypartition"), lit("val1")); - let pruned = pruned_partition_list( - state.as_ref(), - store.as_ref(), - &ListingTableUrl::parse("file:///tablepath/").unwrap(), - &[filter], - ".parquet", - &[(String::from("mypartition"), DataType::Utf8)], - ) - .await - .expect("partition pruning failed") - .collect::>() - .await; - - assert_eq!(pruned.len(), 0); - } - - #[tokio::test] - async fn test_pruned_partition_list() { - let (store, state) = make_test_store_and_state(&[ - ("tablepath/mypartition=val1/file.parquet", 100), - ("tablepath/mypartition=val2/file.parquet", 100), - ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0), - ("tablepath/mypartition=val1/other=val3/file.parquet", 100), - ("tablepath/notapartition/file.parquet", 100), - ("tablepath/notmypartition=val1/file.parquet", 100), - ]); - let filter = Expr::eq(col("mypartition"), lit("val1")); - let pruned = pruned_partition_list( - state.as_ref(), - store.as_ref(), - &ListingTableUrl::parse("file:///tablepath/").unwrap(), - &[filter], - ".parquet", - &[(String::from("mypartition"), DataType::Utf8)], - ) - .await - .expect("partition pruning failed") - .try_collect::>() - .await - .unwrap(); - - assert_eq!(pruned.len(), 2); - let f1 = &pruned[0]; - assert_eq!( - f1.object_meta.location.as_ref(), - "tablepath/mypartition=val1/file.parquet" - ); - assert_eq!(&f1.partition_values, &[ScalarValue::from("val1")]); - let f2 = &pruned[1]; - assert_eq!( - f2.object_meta.location.as_ref(), - "tablepath/mypartition=val1/other=val3/file.parquet" - ); - assert_eq!(f2.partition_values, &[ScalarValue::from("val1"),]); - } - - #[tokio::test] - async fn test_pruned_partition_list_multi() { - let (store, state) = make_test_store_and_state(&[ - ("tablepath/part1=p1v1/file.parquet", 100), - ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100), - ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100), - ("tablepath/part1=p1v3/part2=p2v1/file2.parquet", 100), - ("tablepath/part1=p1v2/part2=p2v2/file2.parquet", 100), - ]); - let filter1 = Expr::eq(col("part1"), lit("p1v2")); - let filter2 = Expr::eq(col("part2"), lit("p2v1")); - let pruned = pruned_partition_list( - state.as_ref(), - store.as_ref(), - &ListingTableUrl::parse("file:///tablepath/").unwrap(), - &[filter1, filter2], - ".parquet", - &[ - (String::from("part1"), DataType::Utf8), - (String::from("part2"), DataType::Utf8), - ], - ) - .await - .expect("partition pruning failed") - .try_collect::>() - .await - .unwrap(); - - assert_eq!(pruned.len(), 2); - let f1 = &pruned[0]; - assert_eq!( - f1.object_meta.location.as_ref(), - "tablepath/part1=p1v2/part2=p2v1/file1.parquet" - ); - assert_eq!( - &f1.partition_values, - &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1"),] - ); - let f2 = &pruned[1]; - assert_eq!( - f2.object_meta.location.as_ref(), - "tablepath/part1=p1v2/part2=p2v1/file2.parquet" - ); - assert_eq!( - &f2.partition_values, - &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1")] - ); - } - - #[tokio::test] - async fn test_list_partition() { - let (store, _) = make_test_store_and_state(&[ - ("tablepath/part1=p1v1/file.parquet", 100), - ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100), - ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100), - ("tablepath/part1=p1v3/part2=p2v1/file3.parquet", 100), - ("tablepath/part1=p1v2/part2=p2v2/file4.parquet", 100), - ("tablepath/part1=p1v2/part2=p2v2/empty.parquet", 0), - ]); - - let partitions = list_partitions( - store.as_ref(), - &ListingTableUrl::parse("file:///tablepath/").unwrap(), - 0, - None, - ) - .await - .expect("listing partitions failed"); - - assert_eq!( - &partitions - .iter() - .map(describe_partition) - .collect::>(), - &vec![ - ("tablepath", 0, vec![]), - ("tablepath/part1=p1v1", 1, vec![]), - ("tablepath/part1=p1v2", 1, vec![]), - ("tablepath/part1=p1v3", 1, vec![]), - ] - ); - - let partitions = list_partitions( - store.as_ref(), - &ListingTableUrl::parse("file:///tablepath/").unwrap(), - 1, - None, - ) - .await - .expect("listing partitions failed"); - - assert_eq!( - &partitions - .iter() - .map(describe_partition) - .collect::>(), - &vec![ - ("tablepath", 0, vec![]), - ("tablepath/part1=p1v1", 1, vec!["file.parquet"]), - ("tablepath/part1=p1v2", 1, vec![]), - ("tablepath/part1=p1v2/part2=p2v1", 2, vec![]), - ("tablepath/part1=p1v2/part2=p2v2", 2, vec![]), - ("tablepath/part1=p1v3", 1, vec![]), - ("tablepath/part1=p1v3/part2=p2v1", 2, vec![]), - ] - ); - - let partitions = list_partitions( - store.as_ref(), - &ListingTableUrl::parse("file:///tablepath/").unwrap(), - 2, - None, - ) - .await - .expect("listing partitions failed"); - - assert_eq!( - &partitions - .iter() - .map(describe_partition) - .collect::>(), - &vec![ - ("tablepath", 0, vec![]), - ("tablepath/part1=p1v1", 1, vec!["file.parquet"]), - ("tablepath/part1=p1v2", 1, vec![]), - ("tablepath/part1=p1v3", 1, vec![]), - ( - "tablepath/part1=p1v2/part2=p2v1", - 2, - vec!["file1.parquet", "file2.parquet"] - ), - ("tablepath/part1=p1v2/part2=p2v2", 2, vec!["file4.parquet"]), - ("tablepath/part1=p1v3/part2=p2v1", 2, vec!["file3.parquet"]), - ] - ); - } - #[test] fn test_parse_partitions_for_path() { assert_eq!( @@ -1016,86 +719,4 @@ mod tests { Some(Path::from("a=1970-01-05")), ); } - - pub fn make_test_store_and_state( - files: &[(&str, u64)], - ) -> (Arc, Arc) { - let memory = InMemory::new(); - - for (name, size) in files { - memory - .put(&Path::from(*name), vec![0; *size as usize].into()) - .now_or_never() - .unwrap() - .unwrap(); - } - - (Arc::new(memory), Arc::new(MockSession {})) - } - - struct MockSession {} - - #[async_trait] - impl Session for MockSession { - fn session_id(&self) -> &str { - unimplemented!() - } - - fn config(&self) -> &SessionConfig { - unimplemented!() - } - - async fn create_physical_plan( - &self, - _logical_plan: &LogicalPlan, - ) -> Result> { - unimplemented!() - } - - fn create_physical_expr( - &self, - _expr: Expr, - _df_schema: &DFSchema, - ) -> Result> { - unimplemented!() - } - - fn scalar_functions(&self) -> &std::collections::HashMap> { - unimplemented!() - } - - fn aggregate_functions( - &self, - ) -> &std::collections::HashMap> { - unimplemented!() - } - - fn window_functions(&self) -> &std::collections::HashMap> { - unimplemented!() - } - - fn runtime_env(&self) -> &Arc { - unimplemented!() - } - - fn execution_props(&self) -> &ExecutionProps { - unimplemented!() - } - - fn as_any(&self) -> &dyn Any { - unimplemented!() - } - - fn table_options(&self) -> &TableOptions { - unimplemented!() - } - - fn table_options_mut(&mut self) -> &mut TableOptions { - unimplemented!() - } - - fn task_ctx(&self) -> Arc { - unimplemented!() - } - } } diff --git a/datafusion/catalog-listing/src/mod.rs b/datafusion/catalog-listing/src/mod.rs index 90d04b46b806..1e06483261d2 100644 --- a/datafusion/catalog-listing/src/mod.rs +++ b/datafusion/catalog-listing/src/mod.rs @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +// https://github.com/apache/datafusion/issues/18503 +#![deny(clippy::needless_pass_by_value)] +#![cfg_attr(test, allow(clippy::needless_pass_by_value))] #![doc( html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg", html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg" diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs index 95f9523d4401..33d5c86bf88d 100644 --- a/datafusion/catalog-listing/src/table.rs +++ b/datafusion/catalog-listing/src/table.rs @@ -34,7 +34,7 @@ use datafusion_datasource::schema_adapter::{ DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory, }; use datafusion_datasource::{ - compute_all_files_statistics, ListingTableUrl, PartitionedFile, + compute_all_files_statistics, ListingTableUrl, PartitionedFile, TableSchema, }; use datafusion_execution::cache::cache_manager::FileStatisticsCache; use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache; @@ -338,7 +338,16 @@ impl ListingTable { fn create_file_source_with_schema_adapter( &self, ) -> datafusion_common::Result> { - let mut source = self.options.format.file_source(); + let table_schema = TableSchema::new( + Arc::clone(&self.file_schema), + self.options + .table_partition_cols + .iter() + .map(|(col, field)| Arc::new(Field::new(col, field.clone(), false))) + .collect(), + ); + + let mut source = self.options.format.file_source(table_schema); // Apply schema adapter to source if available // // The source will use this SchemaAdapter to adapt data batches as they flow up the plan. @@ -418,7 +427,7 @@ impl TableProvider for ListingTable { .options .table_partition_cols .iter() - .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone())) + .map(|col| Ok(Arc::new(self.table_schema.field_with_name(&col.0)?.clone()))) .collect::>>()?; let table_partition_col_names = table_partition_cols @@ -491,20 +500,15 @@ impl TableProvider for ListingTable { .format .create_physical_plan( state, - FileScanConfigBuilder::new( - object_store_url, - Arc::clone(&self.file_schema), - file_source, - ) - .with_file_groups(partitioned_file_lists) - .with_constraints(self.constraints.clone()) - .with_statistics(statistics) - .with_projection_indices(projection) - .with_limit(limit) - .with_output_ordering(output_ordering) - .with_table_partition_cols(table_partition_cols) - .with_expr_adapter(self.expr_adapter_factory.clone()) - .build(), + FileScanConfigBuilder::new(object_store_url, file_source) + .with_file_groups(partitioned_file_lists) + .with_constraints(self.constraints.clone()) + .with_statistics(statistics) + .with_projection_indices(projection) + .with_limit(limit) + .with_output_ordering(output_ordering) + .with_expr_adapter(self.expr_adapter_factory.clone()) + .build(), ) .await?; diff --git a/datafusion/common-runtime/src/lib.rs b/datafusion/common-runtime/src/lib.rs index 5d404d99e776..eb32fead7ecf 100644 --- a/datafusion/common-runtime/src/lib.rs +++ b/datafusion/common-runtime/src/lib.rs @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +// https://github.com/apache/datafusion/issues/18503 +#![deny(clippy::needless_pass_by_value)] +#![cfg_attr(test, allow(clippy::needless_pass_by_value))] #![doc( html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg", html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg" diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 0ed499da0475..190ebf05bdd3 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -606,6 +606,29 @@ config_namespace! { /// written, it may be necessary to increase this size to avoid errors from /// the remote end point. pub objectstore_writer_buffer_size: usize, default = 10 * 1024 * 1024 + + /// Whether to enable ANSI SQL mode. + /// + /// The flag is experimental and relevant only for DataFusion Spark built-in functions + /// + /// When `enable_ansi_mode` is set to `true`, the query engine follows ANSI SQL + /// semantics for expressions, casting, and error handling. This means: + /// - **Strict type coercion rules:** implicit casts between incompatible types are disallowed. + /// - **Standard SQL arithmetic behavior:** operations such as division by zero, + /// numeric overflow, or invalid casts raise runtime errors rather than returning + /// `NULL` or adjusted values. + /// - **Consistent ANSI behavior** for string concatenation, comparisons, and `NULL` handling. + /// + /// When `enable_ansi_mode` is `false` (the default), the engine uses a more permissive, + /// non-ANSI mode designed for user convenience and backward compatibility. In this mode: + /// - Implicit casts between types are allowed (e.g., string to integer when possible). + /// - Arithmetic operations are more lenient — for example, `abs()` on the minimum + /// representable integer value returns the input value instead of raising overflow. + /// - Division by zero or invalid casts may return `NULL` instead of failing. + /// + /// # Default + /// `false` — ANSI SQL mode is disabled by default. + pub enable_ansi_mode: bool, default = false } } @@ -1124,6 +1147,15 @@ pub struct ConfigOptions { } impl ConfigField for ConfigOptions { + fn visit(&self, v: &mut V, _key_prefix: &str, _description: &'static str) { + self.catalog.visit(v, "datafusion.catalog", ""); + self.execution.visit(v, "datafusion.execution", ""); + self.optimizer.visit(v, "datafusion.optimizer", ""); + self.explain.visit(v, "datafusion.explain", ""); + self.sql_parser.visit(v, "datafusion.sql_parser", ""); + self.format.visit(v, "datafusion.format", ""); + } + fn set(&mut self, key: &str, value: &str) -> Result<()> { // Extensions are handled in the public `ConfigOptions::set` let (key, rem) = key.split_once('.').unwrap_or((key, "")); @@ -1137,15 +1169,6 @@ impl ConfigField for ConfigOptions { _ => _config_err!("Config value \"{key}\" not found on ConfigOptions"), } } - - fn visit(&self, v: &mut V, _key_prefix: &str, _description: &'static str) { - self.catalog.visit(v, "datafusion.catalog", ""); - self.execution.visit(v, "datafusion.execution", ""); - self.optimizer.visit(v, "datafusion.optimizer", ""); - self.explain.visit(v, "datafusion.explain", ""); - self.sql_parser.visit(v, "datafusion.sql_parser", ""); - self.format.visit(v, "datafusion.format", ""); - } } impl ConfigOptions { diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index fde52944d049..4fa6d28e7324 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -758,6 +758,116 @@ macro_rules! unwrap_or_internal_err { }; } +/// Assert a condition, returning `DataFusionError::Internal` on failure. +/// +/// # Examples +/// +/// ```text +/// assert_or_internal_err!(predicate); +/// assert_or_internal_err!(predicate, "human readable message"); +/// assert_or_internal_err!(predicate, format!("details: {}", value)); +/// ``` +#[macro_export] +macro_rules! assert_or_internal_err { + ($cond:expr) => { + if !$cond { + return Err(DataFusionError::Internal(format!( + "Assertion failed: {}", + stringify!($cond) + ))); + } + }; + ($cond:expr, $($arg:tt)+) => { + if !$cond { + return Err(DataFusionError::Internal(format!( + "Assertion failed: {}: {}", + stringify!($cond), + format!($($arg)+) + ))); + } + }; +} + +/// Assert equality, returning `DataFusionError::Internal` on failure. +/// +/// # Examples +/// +/// ```text +/// assert_eq_or_internal_err!(actual, expected); +/// assert_eq_or_internal_err!(left_expr, right_expr, "values must match"); +/// assert_eq_or_internal_err!(lhs, rhs, "metadata: {}", extra); +/// ``` +#[macro_export] +macro_rules! assert_eq_or_internal_err { + ($left:expr, $right:expr $(,)?) => {{ + let left_val = &$left; + let right_val = &$right; + if left_val != right_val { + return Err(DataFusionError::Internal(format!( + "Assertion failed: {} == {} (left: {:?}, right: {:?})", + stringify!($left), + stringify!($right), + left_val, + right_val + ))); + } + }}; + ($left:expr, $right:expr, $($arg:tt)+) => {{ + let left_val = &$left; + let right_val = &$right; + if left_val != right_val { + return Err(DataFusionError::Internal(format!( + "Assertion failed: {} == {} (left: {:?}, right: {:?}): {}", + stringify!($left), + stringify!($right), + left_val, + right_val, + format!($($arg)+) + ))); + } + }}; +} + +/// Assert inequality, returning `DataFusionError::Internal` on failure. +/// +/// # Examples +/// +/// ```text +/// assert_ne_or_internal_err!(left, right); +/// assert_ne_or_internal_err!(lhs_expr, rhs_expr, "values must differ"); +/// assert_ne_or_internal_err!(a, b, "context {}", info); +/// ``` +#[macro_export] +macro_rules! assert_ne_or_internal_err { + ($left:expr, $right:expr $(,)?) => {{ + let left_val = &$left; + let right_val = &$right; + if left_val == right_val { + return Err(DataFusionError::Internal(format!( + "Assertion failed: {} != {} (left: {:?}, right: {:?})", + stringify!($left), + stringify!($right), + left_val, + right_val + ))); + } + }}; + ($left:expr, $right:expr, $($arg:tt)+) => {{ + let left_val = &$left; + let right_val = &$right; + if left_val == right_val { + return Err(DataFusionError::Internal(format!( + "Assertion failed: {} != {} (left: {:?}, right: {:?}): {}", + stringify!($left), + stringify!($right), + left_val, + right_val, + format!($($arg)+) + ))); + } + }}; +} + /// Add a macros for concise DataFusionError::* errors declaration /// supports placeholders the same way as `format!` /// Examples: @@ -974,6 +1084,115 @@ mod test { use std::sync::Arc; use arrow::error::ArrowError; + use insta::assert_snapshot; + + fn ok_result() -> Result<()> { + Ok(()) + } + + #[test] + fn test_assert_eq_or_internal_err_passes() -> Result<()> { + assert_eq_or_internal_err!(1, 1); + ok_result() + } + + #[test] + fn test_assert_eq_or_internal_err_fails() { + fn check() -> Result<()> { + assert_eq_or_internal_err!(1, 2, "expected equality"); + ok_result() + } + + let err = check().unwrap_err(); + assert_snapshot!( + err.to_string(), + @r" + Internal error: Assertion failed: 1 == 2 (left: 1, right: 2): expected equality. + This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues + " + ); + } + + #[test] + fn test_assert_ne_or_internal_err_passes() -> Result<()> { + assert_ne_or_internal_err!(1, 2); + ok_result() + } + + #[test] + fn test_assert_ne_or_internal_err_fails() { + fn check() -> Result<()> { + assert_ne_or_internal_err!(3, 3, "values must differ"); + ok_result() + } + + let err = check().unwrap_err(); + assert_snapshot!( + err.to_string(), + @r" + Internal error: Assertion failed: 3 != 3 (left: 3, right: 3): values must differ. + This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues + " + ); + } + + #[test] + fn test_assert_or_internal_err_passes() -> Result<()> { + assert_or_internal_err!(true); + assert_or_internal_err!(true, "message"); + ok_result() + } + + #[test] + fn test_assert_or_internal_err_fails_default() { + fn check() -> Result<()> { + assert_or_internal_err!(false); + ok_result() + } + + let err = check().unwrap_err(); + assert_snapshot!( + err.to_string(), + @r" + Internal error: Assertion failed: false. + This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues + " + ); + } + + #[test] + fn test_assert_or_internal_err_fails_with_message() { + fn check() -> Result<()> { + assert_or_internal_err!(false, "custom message"); + ok_result() + } + + let err = check().unwrap_err(); + assert_snapshot!( + err.to_string(), + @r" + Internal error: Assertion failed: false: custom message. + This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues + " + ); + } + + #[test] + fn test_assert_or_internal_err_with_format_arguments() { + fn check() -> Result<()> { + assert_or_internal_err!(false, "custom {}", 42); + ok_result() + } + + let err = check().unwrap_err(); + assert_snapshot!( + err.to_string(), + @r" + Internal error: Assertion failed: false: custom 42. + This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues + " + ); + } #[test] fn test_error_size() { diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs index 6266a7184cf5..653bfbd51378 100644 --- a/datafusion/core/benches/sql_planner.rs +++ b/datafusion/core/benches/sql_planner.rs @@ -23,13 +23,17 @@ extern crate datafusion; mod data_utils; use crate::criterion::Criterion; +use arrow::array::PrimitiveArray; use arrow::array::{ArrayRef, RecordBatch}; +use arrow::datatypes::ArrowNativeTypeOp; +use arrow::datatypes::ArrowPrimitiveType; use arrow::datatypes::{DataType, Field, Fields, Schema}; use criterion::Bencher; use datafusion::datasource::MemTable; use datafusion::execution::context::SessionContext; use datafusion_common::{config::Dialect, ScalarValue}; use datafusion_expr::col; +use rand_distr::num_traits::NumCast; use std::hint::black_box; use std::path::PathBuf; use std::sync::Arc; @@ -155,18 +159,30 @@ fn benchmark_with_param_values_many_columns( /// 0,100...9900 /// 0,200...19800 /// 0,300...29700 -fn register_union_order_table(ctx: &SessionContext, num_columns: usize, num_rows: usize) { - // ("c0", [0, 0, ...]) - // ("c1": [100, 200, ...]) - // etc - let iter = (0..num_columns).map(|i| i as u64).map(|i| { - let array: ArrayRef = Arc::new(arrow::array::UInt64Array::from_iter_values( - (0..num_rows) - .map(|j| j as u64 * 100 + i) - .collect::>(), - )); +fn register_union_order_table_generic( + ctx: &SessionContext, + num_columns: usize, + num_rows: usize, +) where + T: ArrowPrimitiveType, + T::Native: ArrowNativeTypeOp + NumCast, +{ + let iter = (0..num_columns).map(|i| { + let array_data: Vec = (0..num_rows) + .map(|j| { + let value = (j as u64) * 100 + (i as u64); + ::from(value).unwrap_or_else(|| { + panic!("Failed to cast numeric value to Native type") + }) + }) + .collect(); + + // Use PrimitiveArray which is generic over the ArrowPrimitiveType T + let array: ArrayRef = Arc::new(PrimitiveArray::::from_iter_values(array_data)); + (format!("c{i}"), array) }); + let batch = RecordBatch::try_from_iter(iter).unwrap(); let schema = batch.schema(); let partitions = vec![vec![batch]]; @@ -183,7 +199,6 @@ fn register_union_order_table(ctx: &SessionContext, num_columns: usize, num_rows ctx.register_table("t", Arc::new(table)).unwrap(); } - /// return a query like /// ```sql /// select c1, 2 as c2, ... n as cn from t ORDER BY c1 @@ -403,13 +418,40 @@ fn criterion_benchmark(c: &mut Criterion) { // -- Sorted Queries -- // 100, 200 && 300 is taking too long - https://github.com/apache/datafusion/issues/18366 + // Logical Plan for datatype Int64 and UInt64 differs, UInt64 Logical Plan's Union are wrapped + // up in Projection, and EliminateNestedUnion OptimezerRule is not applied leading to significantly + // longer execution time. + // https://github.com/apache/datafusion/issues/17261 + for column_count in [10, 50 /* 100, 200, 300 */] { - register_union_order_table(&ctx, column_count, 1000); + register_union_order_table_generic::( + &ctx, + column_count, + 1000, + ); // this query has many expressions in its sort order so stresses // order equivalence validation c.bench_function( - &format!("physical_sorted_union_order_by_{column_count}"), + &format!("physical_sorted_union_order_by_{column_count}_int64"), + |b| { + // SELECT ... UNION ALL ... + let query = union_orderby_query(column_count); + b.iter(|| physical_plan(&ctx, &rt, &query)) + }, + ); + + let _ = ctx.deregister_table("t"); + } + + for column_count in [10, 50 /* 100, 200, 300 */] { + register_union_order_table_generic::( + &ctx, + column_count, + 1000, + ); + c.bench_function( + &format!("physical_sorted_union_order_by_{column_count}_uint64"), |b| { // SELECT ... UNION ALL ... let query = union_orderby_query(column_count); diff --git a/datafusion/core/benches/topk_aggregate.rs b/datafusion/core/benches/topk_aggregate.rs index 9a5fb7163be5..7971293c9ce2 100644 --- a/datafusion/core/benches/topk_aggregate.rs +++ b/datafusion/core/benches/topk_aggregate.rs @@ -46,7 +46,7 @@ async fn create_context( opts.optimizer.enable_topk_aggregation = use_topk; let ctx = SessionContext::new_with_config(cfg); let _ = ctx.register_table("traces", mem_table)?; - let sql = format!("select trace_id, max(timestamp_ms) from traces group by trace_id order by max(timestamp_ms) desc limit {limit};"); + let sql = format!("select max(timestamp_ms) from traces group by trace_id order by max(timestamp_ms) desc limit {limit};"); let df = ctx.sql(sql.as_str()).await?; let physical_plan = df.create_physical_plan().await?; let actual_phys_plan = displayable(physical_plan.as_ref()).indent(true).to_string(); @@ -75,20 +75,20 @@ async fn aggregate( let actual = format!("{}", pretty_format_batches(&batches)?).to_lowercase(); let expected_asc = r#" -+----------------------------------+--------------------------+ -| trace_id | max(traces.timestamp_ms) | -+----------------------------------+--------------------------+ -| 5868861a23ed31355efc5200eb80fe74 | 16909009999999 | -| 4040e64656804c3d77320d7a0e7eb1f0 | 16909009999998 | -| 02801bbe533190a9f8713d75222f445d | 16909009999997 | -| 9e31b3b5a620de32b68fefa5aeea57f1 | 16909009999996 | -| 2d88a860e9bd1cfaa632d8e7caeaa934 | 16909009999995 | -| a47edcef8364ab6f191dd9103e51c171 | 16909009999994 | -| 36a3fa2ccfbf8e00337f0b1254384db6 | 16909009999993 | -| 0756be84f57369012e10de18b57d8a2f | 16909009999992 | -| d4d6bf9845fa5897710e3a8db81d5907 | 16909009999991 | -| 3c2cc1abe728a66b61e14880b53482a0 | 16909009999990 | -+----------------------------------+--------------------------+ ++--------------------------+ +| max(traces.timestamp_ms) | ++--------------------------+ +| 16909009999999 | +| 16909009999998 | +| 16909009999997 | +| 16909009999996 | +| 16909009999995 | +| 16909009999994 | +| 16909009999993 | +| 16909009999992 | +| 16909009999991 | +| 16909009999990 | ++--------------------------+ "# .trim(); if asc { diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 98804e424b40..b43c9c671f7d 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -1655,7 +1655,7 @@ impl DataFrame { pub fn into_view(self) -> Arc { Arc::new(DataFrameTableProvider { plan: self.plan, - table_type: TableType::Temporary, + table_type: TableType::View, }) } diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs index 4881783eeba6..7c55d452c4e1 100644 --- a/datafusion/core/src/datasource/file_format/mod.rs +++ b/datafusion/core/src/datasource/file_format/mod.rs @@ -40,6 +40,7 @@ pub(crate) mod test_util { use datafusion_catalog::Session; use datafusion_common::Result; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; + use datafusion_datasource::TableSchema; use datafusion_datasource::{file_format::FileFormat, PartitionedFile}; use datafusion_execution::object_store::ObjectStoreUrl; use std::sync::Arc; @@ -66,6 +67,8 @@ pub(crate) mod test_util { .await? }; + let table_schema = TableSchema::new(file_schema.clone(), vec![]); + let statistics = format .infer_stats(state, &store, file_schema.clone(), &meta) .await?; @@ -85,8 +88,7 @@ pub(crate) mod test_util { state, FileScanConfigBuilder::new( ObjectStoreUrl::local_filesystem(), - file_schema, - format.file_source(), + format.file_source(table_schema), ) .with_file_groups(file_groups) .with_statistics(statistics) diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs index 37b9663111a5..620e389a0fb8 100644 --- a/datafusion/core/src/datasource/mod.rs +++ b/datafusion/core/src/datasource/mod.rs @@ -124,16 +124,13 @@ mod tests { let f2 = Field::new("extra_column", DataType::Utf8, true); let schema = Arc::new(Schema::new(vec![f1.clone(), f2.clone()])); - let source = ParquetSource::default() + let source = ParquetSource::new(Arc::clone(&schema)) .with_schema_adapter_factory(Arc::new(TestSchemaAdapterFactory {})) .unwrap(); - let base_conf = FileScanConfigBuilder::new( - ObjectStoreUrl::local_filesystem(), - schema, - source, - ) - .with_file(partitioned_file) - .build(); + let base_conf = + FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source) + .with_file(partitioned_file) + .build(); let parquet_exec = DataSourceExec::from_data_source(base_conf); diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs index 9068c9758179..1cf8c573acd9 100644 --- a/datafusion/core/src/datasource/physical_plan/avro.rs +++ b/datafusion/core/src/datasource/physical_plan/avro.rs @@ -34,7 +34,7 @@ mod tests { use datafusion_common::{test_util, Result, ScalarValue}; use datafusion_datasource::file_format::FileFormat; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; - use datafusion_datasource::PartitionedFile; + use datafusion_datasource::{PartitionedFile, TableSchema}; use datafusion_datasource_avro::source::AvroSource; use datafusion_datasource_avro::AvroFormat; use datafusion_execution::object_store::ObjectStoreUrl; @@ -81,15 +81,11 @@ mod tests { .infer_schema(&state, &store, std::slice::from_ref(&meta)) .await?; - let source = Arc::new(AvroSource::new()); - let conf = FileScanConfigBuilder::new( - ObjectStoreUrl::local_filesystem(), - file_schema, - source, - ) - .with_file(meta.into()) - .with_projection_indices(Some(vec![0, 1, 2])) - .build(); + let source = Arc::new(AvroSource::new(Arc::clone(&file_schema))); + let conf = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source) + .with_file(meta.into()) + .with_projection_indices(Some(vec![0, 1, 2])) + .build(); let source_exec = DataSourceExec::from_data_source(conf); assert_eq!( @@ -157,8 +153,8 @@ mod tests { // Include the missing column in the projection let projection = Some(vec![0, 1, 2, actual_schema.fields().len()]); - let source = Arc::new(AvroSource::new()); - let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source) + let source = Arc::new(AvroSource::new(Arc::clone(&file_schema))); + let conf = FileScanConfigBuilder::new(object_store_url, source) .with_file(meta.into()) .with_projection_indices(projection) .build(); @@ -227,13 +223,16 @@ mod tests { partitioned_file.partition_values = vec![ScalarValue::from("2021-10-26")]; let projection = Some(vec![0, 1, file_schema.fields().len(), 2]); - let source = Arc::new(AvroSource::new()); - let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source) + let table_schema = TableSchema::new( + file_schema.clone(), + vec![Arc::new(Field::new("date", DataType::Utf8, false))], + ); + let source = Arc::new(AvroSource::new(table_schema.clone())); + let conf = FileScanConfigBuilder::new(object_store_url, source) // select specific columns of the files as well as the partitioning // column which is supposed to be the last column in the table schema. .with_projection_indices(projection) .with_file(partitioned_file) - .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)]) .build(); let source_exec = DataSourceExec::from_data_source(conf); diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs index 4f46a57d8b13..ac5df24d4999 100644 --- a/datafusion/core/src/datasource/physical_plan/csv.rs +++ b/datafusion/core/src/datasource/physical_plan/csv.rs @@ -29,12 +29,14 @@ mod tests { use std::io::Write; use std::sync::Arc; + use datafusion_datasource::TableSchema; use datafusion_datasource_csv::CsvFormat; use object_store::ObjectStore; use crate::prelude::CsvReadOptions; use crate::prelude::SessionContext; use crate::test::partitioned_file_groups; + use datafusion_common::config::CsvOptions; use datafusion_common::test_util::arrow_test_data; use datafusion_common::test_util::batches_to_string; use datafusion_common::{assert_batches_eq, Result}; @@ -94,6 +96,8 @@ mod tests { async fn csv_exec_with_projection( file_compression_type: FileCompressionType, ) -> Result<()> { + use datafusion_datasource::TableSchema; + let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); let file_schema = aggr_test_schema(); @@ -110,16 +114,21 @@ mod tests { tmp_dir.path(), )?; - let source = Arc::new(CsvSource::new(true, b',', b'"')); - let config = FileScanConfigBuilder::from(partitioned_csv_config( - file_schema, - file_groups, - source, - )) - .with_file_compression_type(file_compression_type) - .with_newlines_in_values(false) - .with_projection_indices(Some(vec![0, 2, 4])) - .build(); + let options = CsvOptions { + has_header: Some(true), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema)); + let source = + Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options)); + let config = + FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)) + .with_file_compression_type(file_compression_type) + .with_newlines_in_values(false) + .with_projection_indices(Some(vec![0, 2, 4])) + .build(); assert_eq!(13, config.file_schema().fields().len()); let csv = DataSourceExec::from_data_source(config); @@ -158,6 +167,8 @@ mod tests { async fn csv_exec_with_mixed_order_projection( file_compression_type: FileCompressionType, ) -> Result<()> { + use datafusion_datasource::TableSchema; + let cfg = SessionConfig::new().set_str("datafusion.catalog.has_header", "true"); let session_ctx = SessionContext::new_with_config(cfg); let task_ctx = session_ctx.task_ctx(); @@ -175,16 +186,21 @@ mod tests { tmp_dir.path(), )?; - let source = Arc::new(CsvSource::new(true, b',', b'"')); - let config = FileScanConfigBuilder::from(partitioned_csv_config( - file_schema, - file_groups, - source, - )) - .with_newlines_in_values(false) - .with_file_compression_type(file_compression_type.to_owned()) - .with_projection_indices(Some(vec![4, 0, 2])) - .build(); + let options = CsvOptions { + has_header: Some(true), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema)); + let source = + Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options)); + let config = + FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)) + .with_newlines_in_values(false) + .with_file_compression_type(file_compression_type.to_owned()) + .with_projection_indices(Some(vec![4, 0, 2])) + .build(); assert_eq!(13, config.file_schema().fields().len()); let csv = DataSourceExec::from_data_source(config); assert_eq!(3, csv.schema().fields().len()); @@ -221,6 +237,7 @@ mod tests { async fn csv_exec_with_limit( file_compression_type: FileCompressionType, ) -> Result<()> { + use datafusion_datasource::TableSchema; use futures::StreamExt; let cfg = SessionConfig::new().set_str("datafusion.catalog.has_header", "true"); @@ -240,16 +257,21 @@ mod tests { tmp_dir.path(), )?; - let source = Arc::new(CsvSource::new(true, b',', b'"')); - let config = FileScanConfigBuilder::from(partitioned_csv_config( - file_schema, - file_groups, - source, - )) - .with_newlines_in_values(false) - .with_file_compression_type(file_compression_type.to_owned()) - .with_limit(Some(5)) - .build(); + let options = CsvOptions { + has_header: Some(true), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema)); + let source = + Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options)); + let config = + FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)) + .with_newlines_in_values(false) + .with_file_compression_type(file_compression_type.to_owned()) + .with_limit(Some(5)) + .build(); assert_eq!(13, config.file_schema().fields().len()); let csv = DataSourceExec::from_data_source(config); assert_eq!(13, csv.schema().fields().len()); @@ -287,6 +309,8 @@ mod tests { async fn csv_exec_with_missing_column( file_compression_type: FileCompressionType, ) -> Result<()> { + use datafusion_datasource::TableSchema; + let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); let file_schema = aggr_test_schema_with_missing_col(); @@ -303,16 +327,21 @@ mod tests { tmp_dir.path(), )?; - let source = Arc::new(CsvSource::new(true, b',', b'"')); - let config = FileScanConfigBuilder::from(partitioned_csv_config( - file_schema, - file_groups, - source, - )) - .with_newlines_in_values(false) - .with_file_compression_type(file_compression_type.to_owned()) - .with_limit(Some(5)) - .build(); + let options = CsvOptions { + has_header: Some(true), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema)); + let source = + Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options)); + let config = + FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)) + .with_newlines_in_values(false) + .with_file_compression_type(file_compression_type.to_owned()) + .with_limit(Some(5)) + .build(); assert_eq!(14, config.file_schema().fields().len()); let csv = DataSourceExec::from_data_source(config); assert_eq!(14, csv.schema().fields().len()); @@ -341,6 +370,7 @@ mod tests { file_compression_type: FileCompressionType, ) -> Result<()> { use datafusion_common::ScalarValue; + use datafusion_datasource::TableSchema; let session_ctx = SessionContext::new(); let task_ctx = session_ctx.task_ctx(); @@ -362,19 +392,26 @@ mod tests { let num_file_schema_fields = file_schema.fields().len(); - let source = Arc::new(CsvSource::new(true, b',', b'"')); - let config = FileScanConfigBuilder::from(partitioned_csv_config( - file_schema, - file_groups, - source, - )) - .with_newlines_in_values(false) - .with_file_compression_type(file_compression_type.to_owned()) - .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)]) - // We should be able to project on the partition column - // Which is supposed to be after the file fields - .with_projection_indices(Some(vec![0, num_file_schema_fields])) - .build(); + let options = CsvOptions { + has_header: Some(true), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + let table_schema = TableSchema::new( + Arc::clone(&file_schema), + vec![Arc::new(Field::new("date", DataType::Utf8, false))], + ); + let source = + Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options)); + let config = + FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)) + .with_newlines_in_values(false) + .with_file_compression_type(file_compression_type.to_owned()) + // We should be able to project on the partition column + // Which is supposed to be after the file fields + .with_projection_indices(Some(vec![0, num_file_schema_fields])) + .build(); // we don't have `/date=xx/` in the path but that is ok because // partitions are resolved during scan anyway @@ -463,15 +500,20 @@ mod tests { ) .unwrap(); - let source = Arc::new(CsvSource::new(true, b',', b'"')); - let config = FileScanConfigBuilder::from(partitioned_csv_config( - file_schema, - file_groups, - source, - )) - .with_newlines_in_values(false) - .with_file_compression_type(file_compression_type.to_owned()) - .build(); + let options = CsvOptions { + has_header: Some(true), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema)); + let source = + Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options)); + let config = + FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)) + .with_newlines_in_values(false) + .with_file_compression_type(file_compression_type.to_owned()) + .build(); let csv = DataSourceExec::from_data_source(config); let it = csv.execute(0, task_ctx).unwrap(); diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs index f7d5c710bf48..de7e87d25c84 100644 --- a/datafusion/core/src/datasource/physical_plan/json.rs +++ b/datafusion/core/src/datasource/physical_plan/json.rs @@ -176,8 +176,8 @@ mod tests { let (object_store_url, file_groups, file_schema) = prepare_store(&state, file_compression_type.to_owned(), tmp_dir.path()).await; - let source = Arc::new(JsonSource::new()); - let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source) + let source = Arc::new(JsonSource::new(Arc::clone(&file_schema))); + let conf = FileScanConfigBuilder::new(object_store_url, source) .with_file_groups(file_groups) .with_limit(Some(3)) .with_file_compression_type(file_compression_type.to_owned()) @@ -251,8 +251,8 @@ mod tests { let file_schema = Arc::new(builder.finish()); let missing_field_idx = file_schema.fields.len() - 1; - let source = Arc::new(JsonSource::new()); - let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source) + let source = Arc::new(JsonSource::new(Arc::clone(&file_schema))); + let conf = FileScanConfigBuilder::new(object_store_url, source) .with_file_groups(file_groups) .with_limit(Some(3)) .with_file_compression_type(file_compression_type.to_owned()) @@ -294,8 +294,8 @@ mod tests { let (object_store_url, file_groups, file_schema) = prepare_store(&state, file_compression_type.to_owned(), tmp_dir.path()).await; - let source = Arc::new(JsonSource::new()); - let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source) + let source = Arc::new(JsonSource::new(Arc::clone(&file_schema))); + let conf = FileScanConfigBuilder::new(object_store_url, source) .with_file_groups(file_groups) .with_projection_indices(Some(vec![0, 2])) .with_file_compression_type(file_compression_type.to_owned()) @@ -342,8 +342,8 @@ mod tests { let (object_store_url, file_groups, file_schema) = prepare_store(&state, file_compression_type.to_owned(), tmp_dir.path()).await; - let source = Arc::new(JsonSource::new()); - let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source) + let source = Arc::new(JsonSource::new(Arc::clone(&file_schema))); + let conf = FileScanConfigBuilder::new(object_store_url, source) .with_file_groups(file_groups) .with_projection_indices(Some(vec![3, 0, 2])) .with_file_compression_type(file_compression_type.to_owned()) diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs index 0ffb252a6605..b27dcf56e33c 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet.rs @@ -161,7 +161,7 @@ mod tests { .as_ref() .map(|p| logical2physical(p, &table_schema)); - let mut source = ParquetSource::default(); + let mut source = ParquetSource::new(table_schema); if let Some(predicate) = predicate { source = source.with_predicate(predicate); } @@ -186,23 +186,19 @@ mod tests { source = source.with_bloom_filter_on_read(false); } - source.with_schema(TableSchema::new(Arc::clone(&table_schema), vec![])) + Arc::new(source) } fn build_parquet_exec( &self, - file_schema: SchemaRef, file_group: FileGroup, source: Arc, ) -> Arc { - let base_config = FileScanConfigBuilder::new( - ObjectStoreUrl::local_filesystem(), - file_schema, - source, - ) - .with_file_group(file_group) - .with_projection_indices(self.projection.clone()) - .build(); + let base_config = + FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source) + .with_file_group(file_group) + .with_projection_indices(self.projection.clone()) + .build(); DataSourceExec::from_data_source(base_config) } @@ -231,11 +227,8 @@ mod tests { // build a ParquetExec to return the results let parquet_source = self.build_file_source(Arc::clone(table_schema)); - let parquet_exec = self.build_parquet_exec( - Arc::clone(table_schema), - file_group.clone(), - Arc::clone(&parquet_source), - ); + let parquet_exec = + self.build_parquet_exec(file_group.clone(), Arc::clone(&parquet_source)); let analyze_exec = Arc::new(AnalyzeExec::new( false, @@ -243,7 +236,6 @@ mod tests { vec![MetricType::SUMMARY, MetricType::DEV], // use a new ParquetSource to avoid sharing execution metrics self.build_parquet_exec( - Arc::clone(table_schema), file_group.clone(), self.build_file_source(Arc::clone(table_schema)), ), @@ -1550,8 +1542,7 @@ mod tests { ) -> Result<()> { let config = FileScanConfigBuilder::new( ObjectStoreUrl::local_filesystem(), - file_schema, - Arc::new(ParquetSource::default()), + Arc::new(ParquetSource::new(file_schema)), ) .with_file_groups(file_groups) .build(); @@ -1653,23 +1644,26 @@ mod tests { ), ]); - let source = Arc::new(ParquetSource::default()); - let config = FileScanConfigBuilder::new(object_store_url, schema.clone(), source) - .with_file(partitioned_file) - // file has 10 cols so index 12 should be month and 13 should be day - .with_projection_indices(Some(vec![0, 1, 2, 12, 13])) - .with_table_partition_cols(vec![ - Field::new("year", DataType::Utf8, false), - Field::new("month", DataType::UInt8, false), - Field::new( + let table_schema = TableSchema::new( + Arc::clone(&schema), + vec![ + Arc::new(Field::new("year", DataType::Utf8, false)), + Arc::new(Field::new("month", DataType::UInt8, false)), + Arc::new(Field::new( "day", DataType::Dictionary( Box::new(DataType::UInt16), Box::new(DataType::Utf8), ), false, - ), - ]) + )), + ], + ); + let source = Arc::new(ParquetSource::new(table_schema.clone())); + let config = FileScanConfigBuilder::new(object_store_url, source) + .with_file(partitioned_file) + // file has 10 cols so index 12 should be month and 13 should be day + .with_projection_indices(Some(vec![0, 1, 2, 12, 13])) .build(); let parquet_exec = DataSourceExec::from_data_source(config); @@ -1731,8 +1725,7 @@ mod tests { let file_schema = Arc::new(Schema::empty()); let config = FileScanConfigBuilder::new( ObjectStoreUrl::local_filesystem(), - file_schema, - Arc::new(ParquetSource::default()), + Arc::new(ParquetSource::new(file_schema)), ) .with_file(partitioned_file) .build(); @@ -2279,11 +2272,11 @@ mod tests { let size_hint_calls = reader_factory.metadata_size_hint_calls.clone(); let source = Arc::new( - ParquetSource::default() + ParquetSource::new(Arc::clone(&schema)) .with_parquet_file_reader_factory(reader_factory) .with_metadata_size_hint(456), ); - let config = FileScanConfigBuilder::new(store_url, schema, source) + let config = FileScanConfigBuilder::new(store_url, source) .with_file( PartitionedFile { object_meta: ObjectMeta { diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 687779787ab5..c732c2c92f64 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -76,6 +76,7 @@ pub use datafusion_execution::config::SessionConfig; use datafusion_execution::registry::SerializerRegistry; pub use datafusion_execution::TaskContext; pub use datafusion_expr::execution_props::ExecutionProps; +use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::{ expr_rewriter::FunctionRewrite, logical_plan::{DdlStatement, Statement}, @@ -83,6 +84,7 @@ use datafusion_expr::{ Expr, UserDefinedLogicalNode, WindowUDF, }; use datafusion_optimizer::analyzer::type_coercion::TypeCoercion; +use datafusion_optimizer::simplify_expressions::ExprSimplifier; use datafusion_optimizer::Analyzer; use datafusion_optimizer::{AnalyzerRule, OptimizerRule}; use datafusion_session::SessionStore; @@ -1269,14 +1271,18 @@ impl SessionContext { exec_datafusion_err!("Prepared statement '{}' does not exist", name) })?; + let state = self.state.read(); + let context = SimplifyContext::new(state.execution_props()); + let simplifier = ExprSimplifier::new(context); + // Only allow literals as parameters for now. let mut params: Vec = parameters .into_iter() - .map(|e| match e { + .map(|e| match simplifier.simplify(e)? { Expr::Literal(scalar, metadata) => { Ok(ScalarAndMetadata::new(scalar, metadata)) } - _ => not_impl_err!("Unsupported parameter type: {}", e), + e => not_impl_err!("Unsupported parameter type: {e}"), }) .collect::>()?; diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index c15b7eae0843..d7a66db28ac4 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -547,6 +547,16 @@ impl SessionState { let sql_expr = self.sql_to_expr_with_alias(sql, &dialect)?; + self.create_logical_expr_from_sql_expr(sql_expr, df_schema) + } + + /// Creates a datafusion style AST [`Expr`] from a SQL expression. + #[cfg(feature = "sql")] + pub fn create_logical_expr_from_sql_expr( + &self, + sql_expr: SQLExprWithAlias, + df_schema: &DFSchema, + ) -> datafusion_common::Result { let provider = SessionContextProvider { state: self, tables: HashMap::new(), @@ -2097,6 +2107,36 @@ mod tests { assert!(sql_to_expr(&state).is_err()) } + #[test] + #[cfg(feature = "sql")] + fn test_create_logical_expr_from_sql_expr() { + let state = SessionStateBuilder::new().with_default_features().build(); + + let provider = SessionContextProvider { + state: &state, + tables: HashMap::new(), + }; + + let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]); + let df_schema = DFSchema::try_from(schema).unwrap(); + let dialect = state.config.options().sql_parser.dialect; + let query = SqlToRel::new_with_options(&provider, state.get_parser_options()); + + for sql in ["[1,2,3]", "a > 10", "SUM(a)"] { + let sql_expr = state.sql_to_expr(sql, &dialect).unwrap(); + let from_str = query + .sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new()) + .unwrap(); + + let sql_expr_with_alias = + state.sql_to_expr_with_alias(sql, &dialect).unwrap(); + let from_expr = state + .create_logical_expr_from_sql_expr(sql_expr_with_alias, &df_schema) + .unwrap(); + assert_eq!(from_str, from_expr); + } + } + #[test] fn test_from_existing() -> Result<()> { fn employee_batch() -> RecordBatch { diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index c280b50a9f07..f10755a4594c 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -64,7 +64,9 @@ use datafusion_catalog::ScanArgs; use datafusion_common::display::ToStringifiedPlan; use datafusion_common::format::ExplainAnalyzeLevel; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}; -use datafusion_common::TableReference; +use datafusion_common::{ + assert_eq_or_internal_err, assert_or_internal_err, TableReference, +}; use datafusion_common::{ exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, ScalarValue, @@ -347,11 +349,11 @@ impl DefaultPhysicalPlanner { .flatten() .collect::>(); // Ideally this never happens if we have a valid LogicalPlan tree - if outputs.len() != 1 { - return internal_err!( - "Failed to convert LogicalPlan to ExecutionPlan: More than one root detected" - ); - } + assert_eq_or_internal_err!( + outputs.len(), + 1, + "Failed to convert LogicalPlan to ExecutionPlan: More than one root detected" + ); let plan = outputs.pop().unwrap(); Ok(plan) } @@ -588,9 +590,10 @@ impl DefaultPhysicalPlanner { } } LogicalPlan::Window(Window { window_expr, .. }) => { - if window_expr.is_empty() { - return internal_err!("Impossibly got empty window expression"); - } + assert_or_internal_err!( + !window_expr.is_empty(), + "Impossibly got empty window expression" + ); let input_exec = children.one()?; @@ -850,6 +853,7 @@ impl DefaultPhysicalPlanner { )? { PlanAsyncExpr::Sync(PlannedExprResult::Expr(runtime_expr)) => { FilterExec::try_new(Arc::clone(&runtime_expr[0]), physical_input)? + .with_batch_size(session_state.config().batch_size())? } PlanAsyncExpr::Async( async_map, @@ -868,6 +872,7 @@ impl DefaultPhysicalPlanner { .with_projection(Some( (0..input.schema().fields().len()).collect(), ))? + .with_batch_size(session_state.config().batch_size())? } _ => { return internal_err!( @@ -1764,14 +1769,12 @@ fn qualify_join_schema_sides( .zip(left_fields.iter().chain(right_fields.iter())) .enumerate() { - if field.name() != expected.name() { - return internal_err!( - "Field name mismatch at index {}: expected '{}', found '{}'", - i, - expected.name(), - field.name() - ); - } + assert_eq_or_internal_err!( + field.name(), + expected.name(), + "Field name mismatch at index {}", + i + ); } // qualify sides diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs index 68f83e7f1f11..bbc85af7d874 100644 --- a/datafusion/core/src/test/mod.rs +++ b/datafusion/core/src/test/mod.rs @@ -35,12 +35,15 @@ use crate::error::Result; use crate::logical_expr::LogicalPlan; use crate::test_util::{aggr_test_schema, arrow_test_data}; +use datafusion_common::config::CsvOptions; + use arrow::array::{self, Array, ArrayRef, Decimal128Builder, Int32Array}; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; #[cfg(feature = "compression")] use datafusion_common::DataFusionError; use datafusion_datasource::source::DataSourceExec; +use datafusion_datasource::TableSchema; #[cfg(feature = "compression")] use bzip2::write::BzEncoder; @@ -92,11 +95,17 @@ pub fn scan_partitioned_csv( FileCompressionType::UNCOMPRESSED, work_dir, )?; - let source = Arc::new(CsvSource::new(true, b'"', b'"')); - let config = - FileScanConfigBuilder::from(partitioned_csv_config(schema, file_groups, source)) - .with_file_compression_type(FileCompressionType::UNCOMPRESSED) - .build(); + let options = CsvOptions { + has_header: Some(true), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + let table_schema = TableSchema::from_file_schema(schema); + let source = Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options)); + let config = FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source)) + .with_file_compression_type(FileCompressionType::UNCOMPRESSED) + .build(); Ok(DataSourceExec::from_data_source(config)) } diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs index 203d9e97d2a8..b5213cee3f2d 100644 --- a/datafusion/core/src/test_util/parquet.rs +++ b/datafusion/core/src/test_util/parquet.rs @@ -37,10 +37,8 @@ use crate::physical_plan::metrics::MetricsSet; use crate::physical_plan::ExecutionPlan; use crate::prelude::{Expr, SessionConfig, SessionContext}; -use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::source::DataSourceExec; -use datafusion_datasource::TableSchema; use object_store::path::Path; use object_store::ObjectMeta; use parquet::arrow::ArrowWriter; @@ -157,20 +155,21 @@ impl TestParquetFile { maybe_filter: Option, ) -> Result> { let parquet_options = ctx.copied_table_options().parquet; - let source = Arc::new(ParquetSource::new(parquet_options.clone())); - let scan_config_builder = FileScanConfigBuilder::new( - self.object_store_url.clone(), - Arc::clone(&self.schema), - source, - ) - .with_file(PartitionedFile { - object_meta: self.object_meta.clone(), - partition_values: vec![], - range: None, - statistics: None, - extensions: None, - metadata_size_hint: None, - }); + let source = Arc::new( + ParquetSource::new(Arc::clone(&self.schema)) + .with_table_parquet_options(parquet_options.clone()), + ); + let scan_config_builder = + FileScanConfigBuilder::new(self.object_store_url.clone(), source).with_file( + PartitionedFile { + object_meta: self.object_meta.clone(), + partition_values: vec![], + range: None, + statistics: None, + extensions: None, + metadata_size_hint: None, + }, + ); let df_schema = Arc::clone(&self.schema).to_dfschema_ref()?; @@ -184,10 +183,10 @@ impl TestParquetFile { create_physical_expr(&filter, &df_schema, &ExecutionProps::default())?; let source = Arc::new( - ParquetSource::new(parquet_options) + ParquetSource::new(Arc::clone(&self.schema)) + .with_table_parquet_options(parquet_options) .with_predicate(Arc::clone(&physical_filter_expr)), - ) - .with_schema(TableSchema::from_file_schema(Arc::clone(&self.schema))); + ); let config = scan_config_builder.with_source(source).build(); let parquet_exec = DataSourceExec::from_data_source(config); diff --git a/datafusion/core/tests/catalog_listing/mod.rs b/datafusion/core/tests/catalog_listing/mod.rs new file mode 100644 index 000000000000..cb6cac4fb067 --- /dev/null +++ b/datafusion/core/tests/catalog_listing/mod.rs @@ -0,0 +1,18 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod pruned_partition_list; diff --git a/datafusion/core/tests/catalog_listing/pruned_partition_list.rs b/datafusion/core/tests/catalog_listing/pruned_partition_list.rs new file mode 100644 index 000000000000..3cdaa3bb9b34 --- /dev/null +++ b/datafusion/core/tests/catalog_listing/pruned_partition_list.rs @@ -0,0 +1,251 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_schema::DataType; +use futures::{FutureExt, StreamExt as _, TryStreamExt as _}; +use object_store::{memory::InMemory, path::Path, ObjectStore as _}; + +use datafusion::execution::SessionStateBuilder; +use datafusion_catalog_listing::helpers::{ + describe_partition, list_partitions, pruned_partition_list, +}; +use datafusion_common::ScalarValue; +use datafusion_datasource::ListingTableUrl; +use datafusion_expr::{col, lit, Expr}; +use datafusion_session::Session; + +#[tokio::test] +async fn test_pruned_partition_list_empty() { + let (store, state) = make_test_store_and_state(&[ + ("tablepath/mypartition=val1/notparquetfile", 100), + ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0), + ("tablepath/file.parquet", 100), + ("tablepath/notapartition/file.parquet", 100), + ("tablepath/notmypartition=val1/file.parquet", 100), + ]); + let filter = Expr::eq(col("mypartition"), lit("val1")); + let pruned = pruned_partition_list( + state.as_ref(), + store.as_ref(), + &ListingTableUrl::parse("file:///tablepath/").unwrap(), + &[filter], + ".parquet", + &[(String::from("mypartition"), DataType::Utf8)], + ) + .await + .expect("partition pruning failed") + .collect::>() + .await; + + assert_eq!(pruned.len(), 0); +} + +#[tokio::test] +async fn test_pruned_partition_list() { + let (store, state) = make_test_store_and_state(&[ + ("tablepath/mypartition=val1/file.parquet", 100), + ("tablepath/mypartition=val2/file.parquet", 100), + ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0), + ("tablepath/mypartition=val1/other=val3/file.parquet", 100), + ("tablepath/notapartition/file.parquet", 100), + ("tablepath/notmypartition=val1/file.parquet", 100), + ]); + let filter = Expr::eq(col("mypartition"), lit("val1")); + let pruned = pruned_partition_list( + state.as_ref(), + store.as_ref(), + &ListingTableUrl::parse("file:///tablepath/").unwrap(), + &[filter], + ".parquet", + &[(String::from("mypartition"), DataType::Utf8)], + ) + .await + .expect("partition pruning failed") + .try_collect::>() + .await + .unwrap(); + + assert_eq!(pruned.len(), 2); + let f1 = &pruned[0]; + assert_eq!( + f1.object_meta.location.as_ref(), + "tablepath/mypartition=val1/file.parquet" + ); + assert_eq!(&f1.partition_values, &[ScalarValue::from("val1")]); + let f2 = &pruned[1]; + assert_eq!( + f2.object_meta.location.as_ref(), + "tablepath/mypartition=val1/other=val3/file.parquet" + ); + assert_eq!(f2.partition_values, &[ScalarValue::from("val1"),]); +} + +#[tokio::test] +async fn test_pruned_partition_list_multi() { + let (store, state) = make_test_store_and_state(&[ + ("tablepath/part1=p1v1/file.parquet", 100), + ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100), + ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100), + ("tablepath/part1=p1v3/part2=p2v1/file2.parquet", 100), + ("tablepath/part1=p1v2/part2=p2v2/file2.parquet", 100), + ]); + let filter1 = Expr::eq(col("part1"), lit("p1v2")); + let filter2 = Expr::eq(col("part2"), lit("p2v1")); + let pruned = pruned_partition_list( + state.as_ref(), + store.as_ref(), + &ListingTableUrl::parse("file:///tablepath/").unwrap(), + &[filter1, filter2], + ".parquet", + &[ + (String::from("part1"), DataType::Utf8), + (String::from("part2"), DataType::Utf8), + ], + ) + .await + .expect("partition pruning failed") + .try_collect::>() + .await + .unwrap(); + + assert_eq!(pruned.len(), 2); + let f1 = &pruned[0]; + assert_eq!( + f1.object_meta.location.as_ref(), + "tablepath/part1=p1v2/part2=p2v1/file1.parquet" + ); + assert_eq!( + &f1.partition_values, + &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1"),] + ); + let f2 = &pruned[1]; + assert_eq!( + f2.object_meta.location.as_ref(), + "tablepath/part1=p1v2/part2=p2v1/file2.parquet" + ); + assert_eq!( + &f2.partition_values, + &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1")] + ); +} + +#[tokio::test] +async fn test_list_partition() { + let (store, _) = make_test_store_and_state(&[ + ("tablepath/part1=p1v1/file.parquet", 100), + ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100), + ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100), + ("tablepath/part1=p1v3/part2=p2v1/file3.parquet", 100), + ("tablepath/part1=p1v2/part2=p2v2/file4.parquet", 100), + ("tablepath/part1=p1v2/part2=p2v2/empty.parquet", 0), + ]); + + let partitions = list_partitions( + store.as_ref(), + &ListingTableUrl::parse("file:///tablepath/").unwrap(), + 0, + None, + ) + .await + .expect("listing partitions failed"); + + assert_eq!( + &partitions + .iter() + .map(describe_partition) + .collect::>(), + &vec![ + ("tablepath", 0, vec![]), + ("tablepath/part1=p1v1", 1, vec![]), + ("tablepath/part1=p1v2", 1, vec![]), + ("tablepath/part1=p1v3", 1, vec![]), + ] + ); + + let partitions = list_partitions( + store.as_ref(), + &ListingTableUrl::parse("file:///tablepath/").unwrap(), + 1, + None, + ) + .await + .expect("listing partitions failed"); + + assert_eq!( + &partitions + .iter() + .map(describe_partition) + .collect::>(), + &vec![ + ("tablepath", 0, vec![]), + ("tablepath/part1=p1v1", 1, vec!["file.parquet"]), + ("tablepath/part1=p1v2", 1, vec![]), + ("tablepath/part1=p1v2/part2=p2v1", 2, vec![]), + ("tablepath/part1=p1v2/part2=p2v2", 2, vec![]), + ("tablepath/part1=p1v3", 1, vec![]), + ("tablepath/part1=p1v3/part2=p2v1", 2, vec![]), + ] + ); + + let partitions = list_partitions( + store.as_ref(), + &ListingTableUrl::parse("file:///tablepath/").unwrap(), + 2, + None, + ) + .await + .expect("listing partitions failed"); + + assert_eq!( + &partitions + .iter() + .map(describe_partition) + .collect::>(), + &vec![ + ("tablepath", 0, vec![]), + ("tablepath/part1=p1v1", 1, vec!["file.parquet"]), + ("tablepath/part1=p1v2", 1, vec![]), + ("tablepath/part1=p1v3", 1, vec![]), + ( + "tablepath/part1=p1v2/part2=p2v1", + 2, + vec!["file1.parquet", "file2.parquet"] + ), + ("tablepath/part1=p1v2/part2=p2v2", 2, vec!["file4.parquet"]), + ("tablepath/part1=p1v3/part2=p2v1", 2, vec!["file3.parquet"]), + ] + ); +} + +pub fn make_test_store_and_state( + files: &[(&str, u64)], +) -> (Arc, Arc) { + let memory = InMemory::new(); + + for (name, size) in files { + memory + .put(&Path::from(*name), vec![0; *size as usize].into()) + .now_or_never() + .unwrap() + .unwrap(); + } + + let state = SessionStateBuilder::new().build(); + (Arc::new(memory), Arc::new(state)) +} diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs index edcf039e4e70..cc4dfcf72059 100644 --- a/datafusion/core/tests/core_integration.rs +++ b/datafusion/core/tests/core_integration.rs @@ -57,6 +57,9 @@ mod serde; /// Run all tests that are found in the `catalog` directory mod catalog; +/// Run all tests that are found in the `catalog_listing` directory +mod catalog_listing; + /// Run all tests that are found in the `tracing` directory mod tracing; diff --git a/datafusion/core/tests/data/partitioned_table_arrow_stream/part=123/data.arrow b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=123/data.arrow new file mode 100644 index 000000000000..bad9e3de4a57 Binary files /dev/null and b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=123/data.arrow differ diff --git a/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow new file mode 100644 index 000000000000..4a07fbfa47f3 Binary files /dev/null and b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow differ diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index 05f5a204c096..4a49dfb2ed42 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -1627,7 +1627,9 @@ async fn register_table() -> Result<()> { let df_impl = DataFrame::new(ctx.state(), df.logical_plan().clone()); // register a dataframe as a table - ctx.register_table("test_table", df_impl.clone().into_view())?; + let table_provider = df_impl.clone().into_view(); + assert_eq!(table_provider.table_type(), TableType::View); + ctx.register_table("test_table", table_provider)?; // pull the table out let table = ctx.table("test_table").await?; @@ -2864,10 +2866,9 @@ async fn test_count_wildcard_on_sort() -> Result<()> { | | ProjectionExec: expr=[b@0 as b, count(Int64(1))@1 as count(*), count(Int64(1))@1 as count(Int64(1))] | | | AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(Int64(1))] | | | CoalesceBatchesExec: target_batch_size=8192 | - | | RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4 | - | | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | - | | AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(Int64(1))] | - | | DataSourceExec: partitions=1, partition_sizes=[1] | + | | RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=1 | + | | AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(Int64(1))] | + | | DataSourceExec: partitions=1, partition_sizes=[1] | | | | +---------------+------------------------------------------------------------------------------------------------------------+ "### @@ -2876,22 +2877,21 @@ async fn test_count_wildcard_on_sort() -> Result<()> { assert_snapshot!( pretty_format_batches(&df_results).unwrap(), @r###" - +---------------+--------------------------------------------------------------------------------+ - | plan_type | plan | - +---------------+--------------------------------------------------------------------------------+ - | logical_plan | Sort: count(*) ASC NULLS LAST | - | | Aggregate: groupBy=[[t1.b]], aggr=[[count(Int64(1)) AS count(*)]] | - | | TableScan: t1 projection=[b] | - | physical_plan | SortPreservingMergeExec: [count(*)@1 ASC NULLS LAST] | - | | SortExec: expr=[count(*)@1 ASC NULLS LAST], preserve_partitioning=[true] | - | | AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(*)] | - | | CoalesceBatchesExec: target_batch_size=8192 | - | | RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4 | - | | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | - | | AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(*)] | - | | DataSourceExec: partitions=1, partition_sizes=[1] | - | | | - +---------------+--------------------------------------------------------------------------------+ + +---------------+----------------------------------------------------------------------------+ + | plan_type | plan | + +---------------+----------------------------------------------------------------------------+ + | logical_plan | Sort: count(*) ASC NULLS LAST | + | | Aggregate: groupBy=[[t1.b]], aggr=[[count(Int64(1)) AS count(*)]] | + | | TableScan: t1 projection=[b] | + | physical_plan | SortPreservingMergeExec: [count(*)@1 ASC NULLS LAST] | + | | SortExec: expr=[count(*)@1 ASC NULLS LAST], preserve_partitioning=[true] | + | | AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(*)] | + | | CoalesceBatchesExec: target_batch_size=8192 | + | | RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=1 | + | | AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(*)] | + | | DataSourceExec: partitions=1, partition_sizes=[1] | + | | | + +---------------+----------------------------------------------------------------------------+ "### ); Ok(()) @@ -3351,10 +3351,9 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> { | | ProjectionExec: expr=[count(Int64(1))@1 as count(*), a@0 as a, true as __always_true] | | | AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))] | | | CoalesceBatchesExec: target_batch_size=8192 | - | | RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4 | - | | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | - | | AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))] | - | | DataSourceExec: partitions=1, partition_sizes=[1] | + | | RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1 | + | | AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))] | + | | DataSourceExec: partitions=1, partition_sizes=[1] | | | | +---------------+---------------------------------------------------------------------------------------------------------------------------+ " @@ -3408,10 +3407,9 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> { | | ProjectionExec: expr=[count(*)@1 as count(*), a@0 as a, true as __always_true] | | | AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(*)] | | | CoalesceBatchesExec: target_batch_size=8192 | - | | RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4 | - | | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | - | | AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(*)] | - | | DataSourceExec: partitions=1, partition_sizes=[1] | + | | RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1 | + | | AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(*)] | + | | DataSourceExec: partitions=1, partition_sizes=[1] | | | | +---------------+---------------------------------------------------------------------------------------------------------------------------+ " diff --git a/datafusion/core/tests/datasource/object_store_access.rs b/datafusion/core/tests/datasource/object_store_access.rs index f89ca9e04914..33129150db58 100644 --- a/datafusion/core/tests/datasource/object_store_access.rs +++ b/datafusion/core/tests/datasource/object_store_access.rs @@ -145,17 +145,8 @@ async fn query_partitioned_csv_file() { +---------+-------+-------+---+----+-----+ ------- Object Store Request Summary ------- RequestCountingObjectStore() - Total Requests: 13 - - LIST (with delimiter) prefix=data - - LIST (with delimiter) prefix=data/a=1 - - LIST (with delimiter) prefix=data/a=2 - - LIST (with delimiter) prefix=data/a=3 - - LIST (with delimiter) prefix=data/a=1/b=10 - - LIST (with delimiter) prefix=data/a=2/b=20 - - LIST (with delimiter) prefix=data/a=3/b=30 - - LIST (with delimiter) prefix=data/a=1/b=10/c=100 - - LIST (with delimiter) prefix=data/a=2/b=20/c=200 - - LIST (with delimiter) prefix=data/a=3/b=30/c=300 + Total Requests: 4 + - LIST prefix=data - GET (opts) path=data/a=1/b=10/c=100/file_1.csv - GET (opts) path=data/a=2/b=20/c=200/file_2.csv - GET (opts) path=data/a=3/b=30/c=300/file_3.csv @@ -174,10 +165,8 @@ async fn query_partitioned_csv_file() { +---------+-------+-------+---+----+-----+ ------- Object Store Request Summary ------- RequestCountingObjectStore() - Total Requests: 4 - - LIST (with delimiter) prefix=data/a=2 - - LIST (with delimiter) prefix=data/a=2/b=20 - - LIST (with delimiter) prefix=data/a=2/b=20/c=200 + Total Requests: 2 + - LIST prefix=data - GET (opts) path=data/a=2/b=20/c=200/file_2.csv " ); @@ -194,17 +183,8 @@ async fn query_partitioned_csv_file() { +---------+-------+-------+---+----+-----+ ------- Object Store Request Summary ------- RequestCountingObjectStore() - Total Requests: 11 - - LIST (with delimiter) prefix=data - - LIST (with delimiter) prefix=data/a=1 - - LIST (with delimiter) prefix=data/a=2 - - LIST (with delimiter) prefix=data/a=3 - - LIST (with delimiter) prefix=data/a=1/b=10 - - LIST (with delimiter) prefix=data/a=2/b=20 - - LIST (with delimiter) prefix=data/a=3/b=30 - - LIST (with delimiter) prefix=data/a=1/b=10/c=100 - - LIST (with delimiter) prefix=data/a=2/b=20/c=200 - - LIST (with delimiter) prefix=data/a=3/b=30/c=300 + Total Requests: 2 + - LIST prefix=data - GET (opts) path=data/a=2/b=20/c=200/file_2.csv " ); @@ -221,17 +201,8 @@ async fn query_partitioned_csv_file() { +---------+-------+-------+---+----+-----+ ------- Object Store Request Summary ------- RequestCountingObjectStore() - Total Requests: 11 - - LIST (with delimiter) prefix=data - - LIST (with delimiter) prefix=data/a=1 - - LIST (with delimiter) prefix=data/a=2 - - LIST (with delimiter) prefix=data/a=3 - - LIST (with delimiter) prefix=data/a=1/b=10 - - LIST (with delimiter) prefix=data/a=2/b=20 - - LIST (with delimiter) prefix=data/a=3/b=30 - - LIST (with delimiter) prefix=data/a=1/b=10/c=100 - - LIST (with delimiter) prefix=data/a=2/b=20/c=200 - - LIST (with delimiter) prefix=data/a=3/b=30/c=300 + Total Requests: 2 + - LIST prefix=data - GET (opts) path=data/a=2/b=20/c=200/file_2.csv " ); @@ -248,9 +219,8 @@ async fn query_partitioned_csv_file() { +---------+-------+-------+---+----+-----+ ------- Object Store Request Summary ------- RequestCountingObjectStore() - Total Requests: 3 - - LIST (with delimiter) prefix=data/a=2/b=20 - - LIST (with delimiter) prefix=data/a=2/b=20/c=200 + Total Requests: 2 + - LIST prefix=data - GET (opts) path=data/a=2/b=20/c=200/file_2.csv " ); @@ -267,17 +237,8 @@ async fn query_partitioned_csv_file() { +---------+-------+-------+---+----+-----+ ------- Object Store Request Summary ------- RequestCountingObjectStore() - Total Requests: 11 - - LIST (with delimiter) prefix=data - - LIST (with delimiter) prefix=data/a=1 - - LIST (with delimiter) prefix=data/a=2 - - LIST (with delimiter) prefix=data/a=3 - - LIST (with delimiter) prefix=data/a=1/b=10 - - LIST (with delimiter) prefix=data/a=2/b=20 - - LIST (with delimiter) prefix=data/a=3/b=30 - - LIST (with delimiter) prefix=data/a=1/b=10/c=100 - - LIST (with delimiter) prefix=data/a=2/b=20/c=200 - - LIST (with delimiter) prefix=data/a=3/b=30/c=300 + Total Requests: 2 + - LIST prefix=data - GET (opts) path=data/a=1/b=10/c=100/file_1.csv " ); diff --git a/datafusion/core/tests/execution/mod.rs b/datafusion/core/tests/execution/mod.rs index 8770b2a20105..f33ef87aa302 100644 --- a/datafusion/core/tests/execution/mod.rs +++ b/datafusion/core/tests/execution/mod.rs @@ -18,3 +18,4 @@ mod coop; mod datasource_split; mod logical_plan; +mod register_arrow; diff --git a/datafusion/core/tests/execution/register_arrow.rs b/datafusion/core/tests/execution/register_arrow.rs new file mode 100644 index 000000000000..4ce16dc0906c --- /dev/null +++ b/datafusion/core/tests/execution/register_arrow.rs @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Integration tests for register_arrow API + +use datafusion::{execution::options::ArrowReadOptions, prelude::*}; +use datafusion_common::Result; + +#[tokio::test] +async fn test_register_arrow_auto_detects_format() -> Result<()> { + let ctx = SessionContext::new(); + + ctx.register_arrow( + "file_format", + "../../datafusion/datasource-arrow/tests/data/example.arrow", + ArrowReadOptions::default(), + ) + .await?; + + ctx.register_arrow( + "stream_format", + "../../datafusion/datasource-arrow/tests/data/example_stream.arrow", + ArrowReadOptions::default(), + ) + .await?; + + let file_result = ctx.sql("SELECT * FROM file_format ORDER BY f0").await?; + let stream_result = ctx.sql("SELECT * FROM stream_format ORDER BY f0").await?; + + let file_batches = file_result.collect().await?; + let stream_batches = stream_result.collect().await?; + + assert_eq!(file_batches.len(), stream_batches.len()); + assert_eq!(file_batches[0].schema(), stream_batches[0].schema()); + + let file_rows: usize = file_batches.iter().map(|b| b.num_rows()).sum(); + let stream_rows: usize = stream_batches.iter().map(|b| b.num_rows()).sum(); + assert_eq!(file_rows, stream_rows); + + Ok(()) +} + +#[tokio::test] +async fn test_register_arrow_join_file_and_stream() -> Result<()> { + let ctx = SessionContext::new(); + + ctx.register_arrow( + "file_table", + "../../datafusion/datasource-arrow/tests/data/example.arrow", + ArrowReadOptions::default(), + ) + .await?; + + ctx.register_arrow( + "stream_table", + "../../datafusion/datasource-arrow/tests/data/example_stream.arrow", + ArrowReadOptions::default(), + ) + .await?; + + let result = ctx + .sql( + "SELECT a.f0, a.f1, b.f0, b.f1 + FROM file_table a + JOIN stream_table b ON a.f0 = b.f0 + WHERE a.f0 <= 2 + ORDER BY a.f0", + ) + .await?; + let batches = result.collect().await?; + + let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + assert_eq!(total_rows, 2); + + Ok(()) +} diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs b/datafusion/core/tests/fuzz_cases/pruning.rs index f8bd4dbc1a76..51ec8f03e5d2 100644 --- a/datafusion/core/tests/fuzz_cases/pruning.rs +++ b/datafusion/core/tests/fuzz_cases/pruning.rs @@ -276,13 +276,12 @@ async fn execute_with_predicate( ctx: &SessionContext, ) -> Vec { let parquet_source = if prune_stats { - ParquetSource::default().with_predicate(predicate.clone()) + ParquetSource::new(schema.clone()).with_predicate(predicate.clone()) } else { - ParquetSource::default() + ParquetSource::new(schema.clone()) }; let config = FileScanConfigBuilder::new( ObjectStoreUrl::parse("memory://").unwrap(), - schema.clone(), Arc::new(parquet_source), ) .with_file_group( diff --git a/datafusion/core/tests/parquet/custom_reader.rs b/datafusion/core/tests/parquet/custom_reader.rs index 3a1f06656236..0a147d15a6fd 100644 --- a/datafusion/core/tests/parquet/custom_reader.rs +++ b/datafusion/core/tests/parquet/custom_reader.rs @@ -80,7 +80,7 @@ async fn route_data_access_ops_to_parquet_file_reader_factory() { .collect(); let source = Arc::new( - ParquetSource::default() + ParquetSource::new(file_schema.clone()) // prepare the scan .with_parquet_file_reader_factory(Arc::new( InMemoryParquetFileReaderFactory(Arc::clone(&in_memory_object_store)), @@ -89,7 +89,6 @@ async fn route_data_access_ops_to_parquet_file_reader_factory() { let base_config = FileScanConfigBuilder::new( // just any url that doesn't point to in memory object store ObjectStoreUrl::local_filesystem(), - file_schema, source, ) .with_file_group(file_group) diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs index 5135f956852c..b35cb6e09cfb 100644 --- a/datafusion/core/tests/parquet/external_access_plan.rs +++ b/datafusion/core/tests/parquet/external_access_plan.rs @@ -355,11 +355,11 @@ impl TestFull { let source = if let Some(predicate) = predicate { let df_schema = DFSchema::try_from(schema.clone())?; let predicate = ctx.create_physical_expr(predicate, &df_schema)?; - Arc::new(ParquetSource::default().with_predicate(predicate)) + Arc::new(ParquetSource::new(schema.clone()).with_predicate(predicate)) } else { - Arc::new(ParquetSource::default()) + Arc::new(ParquetSource::new(schema.clone())) }; - let config = FileScanConfigBuilder::new(object_store_url, schema.clone(), source) + let config = FileScanConfigBuilder::new(object_store_url, source) .with_file(partitioned_file) .build(); diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs index 27bee10234b5..fb2a196b0aa6 100644 --- a/datafusion/core/tests/parquet/page_pruning.rs +++ b/datafusion/core/tests/parquet/page_pruning.rs @@ -81,12 +81,12 @@ async fn get_parquet_exec( let predicate = create_physical_expr(&filter, &df_schema, &execution_props).unwrap(); let source = Arc::new( - ParquetSource::default() + ParquetSource::new(schema.clone()) .with_predicate(predicate) .with_enable_page_index(true) .with_pushdown_filters(pushdown_filters), ); - let base_config = FileScanConfigBuilder::new(object_store_url, schema, source) + let base_config = FileScanConfigBuilder::new(object_store_url, source) .with_file(partitioned_file) .build(); diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs index 40fc6176e212..0e76d626aac5 100644 --- a/datafusion/core/tests/parquet/schema_adapter.rs +++ b/datafusion/core/tests/parquet/schema_adapter.rs @@ -482,7 +482,7 @@ fn test_apply_schema_adapter_with_factory() { ])); // Create a parquet source - let source = ParquetSource::default(); + let source = ParquetSource::new(schema.clone()); // Create a file scan config with source that has a schema adapter factory let factory = Arc::new(PrefixAdapterFactory { @@ -491,12 +491,9 @@ fn test_apply_schema_adapter_with_factory() { let file_source = source.clone().with_schema_adapter_factory(factory).unwrap(); - let config = FileScanConfigBuilder::new( - ObjectStoreUrl::local_filesystem(), - schema.clone(), - file_source, - ) - .build(); + let config = + FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source) + .build(); // Apply schema adapter to a new source let result_source = source.apply_schema_adapter(&config).unwrap(); @@ -532,18 +529,15 @@ fn test_apply_schema_adapter_without_factory() { ])); // Create a parquet source - let source = ParquetSource::default(); + let source = ParquetSource::new(schema.clone()); // Convert to Arc let file_source: Arc = Arc::new(source.clone()); // Create a file scan config without a schema adapter factory - let config = FileScanConfigBuilder::new( - ObjectStoreUrl::local_filesystem(), - schema.clone(), - file_source, - ) - .build(); + let config = + FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source) + .build(); // Apply schema adapter function - should pass through the source unchanged let result_source = source.apply_schema_adapter(&config).unwrap(); diff --git a/datafusion/core/tests/parquet/schema_coercion.rs b/datafusion/core/tests/parquet/schema_coercion.rs index 9be391a9108e..51e5242cbafd 100644 --- a/datafusion/core/tests/parquet/schema_coercion.rs +++ b/datafusion/core/tests/parquet/schema_coercion.rs @@ -62,14 +62,10 @@ async fn multi_parquet_coercion() { Field::new("c2", DataType::Int32, true), Field::new("c3", DataType::Float64, true), ])); - let source = Arc::new(ParquetSource::default()); - let conf = FileScanConfigBuilder::new( - ObjectStoreUrl::local_filesystem(), - file_schema, - source, - ) - .with_file_group(file_group) - .build(); + let source = Arc::new(ParquetSource::new(file_schema.clone())); + let conf = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source) + .with_file_group(file_group) + .build(); let parquet_exec = DataSourceExec::from_data_source(conf); @@ -122,8 +118,7 @@ async fn multi_parquet_coercion_projection() { ])); let config = FileScanConfigBuilder::new( ObjectStoreUrl::local_filesystem(), - file_schema, - Arc::new(ParquetSource::default()), + Arc::new(ParquetSource::new(file_schema)), ) .with_file_group(file_group) .with_projection_indices(Some(vec![1, 0, 2])) diff --git a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs index 5b7d9ac8fbe9..aa5a2d053926 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs @@ -37,6 +37,7 @@ use datafusion::datasource::physical_plan::{CsvSource, ParquetSource}; use datafusion::datasource::source::DataSourceExec; use datafusion::datasource::MemTable; use datafusion::prelude::{SessionConfig, SessionContext}; +use datafusion_common::config::CsvOptions; use datafusion_common::error::Result; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::ScalarValue; @@ -229,8 +230,7 @@ fn parquet_exec_multiple_sorted( ) -> Arc { let config = FileScanConfigBuilder::new( ObjectStoreUrl::parse("test:///").unwrap(), - schema(), - Arc::new(ParquetSource::default()), + Arc::new(ParquetSource::new(schema())), ) .with_file_groups(vec![ FileGroup::new(vec![PartitionedFile::new("x".to_string(), 100)]), @@ -247,14 +247,19 @@ fn csv_exec() -> Arc { } fn csv_exec_with_sort(output_ordering: Vec) -> Arc { - let config = FileScanConfigBuilder::new( - ObjectStoreUrl::parse("test:///").unwrap(), - schema(), - Arc::new(CsvSource::new(false, b',', b'"')), - ) - .with_file(PartitionedFile::new("x".to_string(), 100)) - .with_output_ordering(output_ordering) - .build(); + let config = + FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), { + let options = CsvOptions { + has_header: Some(false), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + Arc::new(CsvSource::new(schema()).with_csv_options(options)) + }) + .with_file(PartitionedFile::new("x".to_string(), 100)) + .with_output_ordering(output_ordering) + .build(); DataSourceExec::from_data_source(config) } @@ -265,17 +270,22 @@ fn csv_exec_multiple() -> Arc { // Created a sorted parquet exec with multiple files fn csv_exec_multiple_sorted(output_ordering: Vec) -> Arc { - let config = FileScanConfigBuilder::new( - ObjectStoreUrl::parse("test:///").unwrap(), - schema(), - Arc::new(CsvSource::new(false, b',', b'"')), - ) - .with_file_groups(vec![ - FileGroup::new(vec![PartitionedFile::new("x".to_string(), 100)]), - FileGroup::new(vec![PartitionedFile::new("y".to_string(), 100)]), - ]) - .with_output_ordering(output_ordering) - .build(); + let config = + FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), { + let options = CsvOptions { + has_header: Some(false), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + Arc::new(CsvSource::new(schema()).with_csv_options(options)) + }) + .with_file_groups(vec![ + FileGroup::new(vec![PartitionedFile::new("x".to_string(), 100)]), + FileGroup::new(vec![PartitionedFile::new("y".to_string(), 100)]), + ]) + .with_output_ordering(output_ordering) + .build(); DataSourceExec::from_data_source(config) } @@ -618,16 +628,13 @@ fn multi_hash_joins() -> Result<()> { assert_plan!(plan_distrib, @r" HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, c@2)] HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); }, // Should include 4 RepartitionExecs @@ -636,16 +643,13 @@ fn multi_hash_joins() -> Result<()> { HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, c@2)] RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); }, }; @@ -690,16 +694,13 @@ fn multi_hash_joins() -> Result<()> { assert_plan!(plan_distrib, @r" HashJoinExec: mode=Partitioned, join_type=..., on=[(b1@1, c@2)] HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); } @@ -710,16 +711,13 @@ fn multi_hash_joins() -> Result<()> { HashJoinExec: mode=Partitioned, join_type=..., on=[(b1@6, c@2)] RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10 HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); }, @@ -780,15 +778,12 @@ fn multi_joins_after_alias() -> Result<()> { HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a1@0, c@2)] ProjectionExec: expr=[a@0 as a1, a@0 as a2] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet " ); let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB); @@ -811,15 +806,12 @@ fn multi_joins_after_alias() -> Result<()> { HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a2@1, c@2)] ProjectionExec: expr=[a@0 as a1, a@0 as a2] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet " ); let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB); @@ -869,15 +861,12 @@ fn multi_joins_after_multi_alias() -> Result<()> { ProjectionExec: expr=[c1@0 as a] ProjectionExec: expr=[c@2 as c1] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet " ); let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB); @@ -1098,21 +1087,17 @@ fn multi_hash_join_key_ordering() -> Result<()> { HashJoinExec: mode=Partitioned, join_type=Inner, on=[(B@2, b1@6), (C@3, c@2), (AA@1, a1@5)] ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)] - RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)] - RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=1 DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=1 ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1] DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)] + RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet " ); let plan_sort = test_config.to_plan(filter_top_join, &SORT_DISTRIB_DISTRIB); @@ -1239,21 +1224,17 @@ fn reorder_join_keys_to_left_input() -> Result<()> { HashJoinExec: mode=Partitioned, join_type=..., on=[(AA@1, a1@5), (B@2, b1@6), (C@3, c@2)] ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1), (c@2, c1@2)] - RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([a1@0, b1@1, c1@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)] - RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 10), input_partitions=1 DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + RepartitionExec: partitioning=Hash([a1@0, b1@1, c1@2], 10), input_partitions=1 ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1] DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)] + RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet ");} } @@ -1371,21 +1352,17 @@ fn reorder_join_keys_to_right_input() -> Result<()> { HashJoinExec: mode=Partitioned, join_type=..., on=[(C@3, c@2), (B@2, b1@6), (AA@1, a1@5)] ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1)] - RepartitionExec: partitioning=Hash([a@0, b@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([a1@0, b1@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)] - RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + RepartitionExec: partitioning=Hash([a@0, b@1], 10), input_partitions=1 DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + RepartitionExec: partitioning=Hash([a1@0, b1@1], 10), input_partitions=1 ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1] DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)] + RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet ");} } @@ -1450,18 +1427,15 @@ fn multi_smj_joins() -> Result<()> { SortMergeJoin: join_type=..., on=[(a@0, c@2)] SortMergeJoin: join_type=..., on=[(a@0, b1@1)] SortExec: expr=[a@0 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet SortExec: expr=[b1@1 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet SortExec: expr=[c@2 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); } // Should include 7 RepartitionExecs (4 hash, 3 round-robin), 4 SortExecs @@ -1480,18 +1454,15 @@ SortMergeJoin: join_type=..., on=[(a@0, c@2)] RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 SortMergeJoin: join_type=..., on=[(a@0, b1@1)] SortExec: expr=[a@0 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet SortExec: expr=[b1@1 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet SortExec: expr=[c@2 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); } } @@ -1505,19 +1476,16 @@ SortMergeJoin: join_type=..., on=[(a@0, c@2)] assert_plan!(plan_sort, @r" SortMergeJoin: join_type=..., on=[(a@0, c@2)] SortMergeJoin: join_type=..., on=[(a@0, b1@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[a@0 ASC], preserve_partitioning=[false] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[b1@1 ASC], preserve_partitioning=[false] - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[c@2 ASC], preserve_partitioning=[false] + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + SortExec: expr=[b1@1 ASC], preserve_partitioning=[false] + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + SortExec: expr=[c@2 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); } // Should include 8 RepartitionExecs (4 hash, 8 round-robin), 4 SortExecs @@ -1533,24 +1501,20 @@ SortMergeJoin: join_type=..., on=[(a@0, c@2)] // TODO(wiedld): show different test result if enforce distribution first. assert_plan!(plan_sort, @r" SortMergeJoin: join_type=..., on=[(a@0, c@2)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[a@0 ASC], preserve_partitioning=[false] - CoalescePartitionsExec - SortMergeJoin: join_type=..., on=[(a@0, b1@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[a@0 ASC], preserve_partitioning=[false] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[b1@1 ASC], preserve_partitioning=[false] - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[c@2 ASC], preserve_partitioning=[false] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + CoalescePartitionsExec + SortMergeJoin: join_type=..., on=[(a@0, b1@1)] + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + SortExec: expr=[b1@1 ASC], preserve_partitioning=[false] + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + SortExec: expr=[c@2 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); } } @@ -1575,18 +1539,15 @@ SortMergeJoin: join_type=..., on=[(a@0, c@2)] SortMergeJoin: join_type=..., on=[(b1@6, c@2)] SortMergeJoin: join_type=..., on=[(a@0, b1@1)] SortExec: expr=[a@0 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet SortExec: expr=[b1@1 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet SortExec: expr=[c@2 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); } // Should include 7 RepartitionExecs (4 hash, 3 round-robin) and 4 SortExecs @@ -1598,18 +1559,15 @@ SortMergeJoin: join_type=..., on=[(b1@6, c@2)] RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10 SortMergeJoin: join_type=..., on=[(a@0, b1@1)] SortExec: expr=[a@0 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet SortExec: expr=[b1@1 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet SortExec: expr=[c@2 ASC], preserve_partitioning=[true] - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); } // this match arm cannot be reached @@ -1625,19 +1583,16 @@ SortMergeJoin: join_type=..., on=[(b1@6, c@2)] assert_plan!(plan_sort, @r" SortMergeJoin: join_type=..., on=[(b1@6, c@2)] SortMergeJoin: join_type=..., on=[(a@0, b1@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[a@0 ASC], preserve_partitioning=[false] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[b1@1 ASC], preserve_partitioning=[false] - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[c@2 ASC], preserve_partitioning=[false] + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + SortExec: expr=[b1@1 ASC], preserve_partitioning=[false] + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + SortExec: expr=[c@2 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); } // Should include 8 RepartitionExecs (4 of them preserves order) and 4 SortExecs @@ -1645,24 +1600,20 @@ SortMergeJoin: join_type=..., on=[(b1@6, c@2)] // TODO(wiedld): show different test result if enforce distribution first. assert_plan!(plan_sort, @r" SortMergeJoin: join_type=..., on=[(b1@6, c@2)] - RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@6 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[b1@6 ASC], preserve_partitioning=[false] - CoalescePartitionsExec - SortMergeJoin: join_type=..., on=[(a@0, b1@1)] - RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[a@0 ASC], preserve_partitioning=[false] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[b1@1 ASC], preserve_partitioning=[false] - ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[c@2 ASC], preserve_partitioning=[false] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=1 + SortExec: expr=[b1@6 ASC], preserve_partitioning=[false] + CoalescePartitionsExec + SortMergeJoin: join_type=..., on=[(a@0, b1@1)] + RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1 + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1 + SortExec: expr=[b1@1 ASC], preserve_partitioning=[false] + ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1 + SortExec: expr=[c@2 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); } // this match arm cannot be reached @@ -1753,27 +1704,25 @@ SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)] let plan_sort = test_config.to_plan(join, &SORT_DISTRIB_DISTRIB); assert_plan!(plan_sort, @r" SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)] - RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b3@1 ASC, a3@0 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[false] - CoalescePartitionsExec - ProjectionExec: expr=[a1@0 as a3, b1@1 as b3] - ProjectionExec: expr=[a1@1 as a1, b1@0 as b1] - AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[] - RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10 - AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[] - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet - RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b2@1 ASC, a2@0 ASC - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[false] - CoalescePartitionsExec - ProjectionExec: expr=[a@1 as a2, b@0 as b2] - AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[] - RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10 - AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[] + RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=1 + SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[false] + CoalescePartitionsExec + ProjectionExec: expr=[a1@0 as a3, b1@1 as b3] + ProjectionExec: expr=[a1@1 as a1, b1@0 as b1] + AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[] + RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10 + AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[] RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet + RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=1 + SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[false] + CoalescePartitionsExec + ProjectionExec: expr=[a@1 as a2, b@0 as b2] + AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[] + RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10 + AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet "); Ok(()) @@ -2597,11 +2546,15 @@ fn parallelization_compressed_csv() -> Result<()> { for compression_type in compression_types { let plan = aggregate_exec_with_alias( DataSourceExec::from_data_source( - FileScanConfigBuilder::new( - ObjectStoreUrl::parse("test:///").unwrap(), - schema(), - Arc::new(CsvSource::new(false, b',', b'"')), - ) + FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), { + let options = CsvOptions { + has_header: Some(false), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + Arc::new(CsvSource::new(schema()).with_csv_options(options)) + }) .with_file(PartitionedFile::new("x".to_string(), 100)) .with_file_compression_type(compression_type) .build(), diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index e3a0eb7e1aa6..c0cfa46733f1 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -31,7 +31,7 @@ use crate::physical_optimizer::test_utils::{ use arrow::compute::SortOptions; use arrow::datatypes::{DataType, SchemaRef}; -use datafusion_common::config::ConfigOptions; +use datafusion_common::config::{ConfigOptions, CsvOptions}; use datafusion_common::tree_node::{TreeNode, TransformedResult}; use datafusion_common::{Result, TableReference}; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; @@ -72,10 +72,15 @@ fn csv_exec_sorted( schema: &SchemaRef, sort_exprs: impl IntoIterator, ) -> Arc { + let options = CsvOptions { + has_header: Some(false), + delimiter: 0, + quote: 0, + ..Default::default() + }; let mut builder = FileScanConfigBuilder::new( ObjectStoreUrl::parse("test:///").unwrap(), - schema.clone(), - Arc::new(CsvSource::new(false, 0, 0)), + Arc::new(CsvSource::new(schema.clone()).with_csv_options(options)), ) .with_file(PartitionedFile::new("x".to_string(), 100)); if let Some(ordering) = LexOrdering::new(sort_exprs) { diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs index de6114950890..31909415a286 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs @@ -859,20 +859,17 @@ async fn test_topk_filter_passes_through_coalesce_partitions() { ]; // Create a source that supports all batches - let source = Arc::new(TestSource::new(true, batches)); - - let base_config = FileScanConfigBuilder::new( - ObjectStoreUrl::parse("test://").unwrap(), - Arc::clone(&schema()), - source, - ) - .with_file_groups(vec![ - // Partition 0 - FileGroup::new(vec![PartitionedFile::new("test1.parquet", 123)]), - // Partition 1 - FileGroup::new(vec![PartitionedFile::new("test2.parquet", 123)]), - ]) - .build(); + let source = Arc::new(TestSource::new(schema(), true, batches)); + + let base_config = + FileScanConfigBuilder::new(ObjectStoreUrl::parse("test://").unwrap(), source) + .with_file_groups(vec![ + // Partition 0 + FileGroup::new(vec![PartitionedFile::new("test1.parquet", 123)]), + // Partition 1 + FileGroup::new(vec![PartitionedFile::new("test2.parquet", 123)]), + ]) + .build(); let scan = DataSourceExec::from_data_source(base_config); diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs index 7d8a9c7c2125..2bd70221f41e 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs @@ -24,7 +24,6 @@ use datafusion_datasource::{ file_scan_config::FileScanConfigBuilder, file_stream::FileOpenFuture, file_stream::FileOpener, schema_adapter::DefaultSchemaAdapterFactory, schema_adapter::SchemaAdapterFactory, source::DataSourceExec, PartitionedFile, - TableSchema, }; use datafusion_physical_expr_common::physical_expr::fmt_sql; use datafusion_physical_optimizer::PhysicalOptimizerRule; @@ -53,7 +52,7 @@ use std::{ pub struct TestOpener { batches: Vec, batch_size: Option, - schema: Option, + schema: SchemaRef, projection: Option>, predicate: Option>, } @@ -71,23 +70,23 @@ impl FileOpener for TestOpener { } batches = new_batches.into_iter().collect(); } - if let Some(schema) = &self.schema { - let factory = DefaultSchemaAdapterFactory::from_schema(Arc::clone(schema)); - let (mapper, projection) = factory.map_schema(&batches[0].schema()).unwrap(); - let mut new_batches = Vec::new(); - for batch in batches { - let batch = if let Some(predicate) = &self.predicate { - batch_filter(&batch, predicate)? - } else { - batch - }; - let batch = batch.project(&projection).unwrap(); - let batch = mapper.map_batch(batch).unwrap(); - new_batches.push(batch); - } - batches = new_batches; + let factory = DefaultSchemaAdapterFactory::from_schema(Arc::clone(&self.schema)); + let (mapper, projection) = factory.map_schema(&batches[0].schema()).unwrap(); + let mut new_batches = Vec::new(); + for batch in batches { + let batch = if let Some(predicate) = &self.predicate { + batch_filter(&batch, predicate)? + } else { + batch + }; + + let batch = batch.project(&projection).unwrap(); + let batch = mapper.map_batch(batch).unwrap(); + new_batches.push(batch); } + batches = new_batches; + if let Some(projection) = &self.projection { batches = batches .into_iter() @@ -102,26 +101,35 @@ impl FileOpener for TestOpener { } /// A placeholder data source that accepts filter pushdown -#[derive(Clone, Default)] +#[derive(Clone)] pub struct TestSource { support: bool, predicate: Option>, statistics: Option, batch_size: Option, batches: Vec, - schema: Option, + schema: SchemaRef, metrics: ExecutionPlanMetricsSet, projection: Option>, schema_adapter_factory: Option>, + table_schema: datafusion_datasource::TableSchema, } impl TestSource { - pub fn new(support: bool, batches: Vec) -> Self { + pub fn new(schema: SchemaRef, support: bool, batches: Vec) -> Self { + let table_schema = + datafusion_datasource::TableSchema::new(Arc::clone(&schema), vec![]); Self { + schema, support, metrics: ExecutionPlanMetricsSet::new(), batches, - ..Default::default() + predicate: None, + statistics: None, + batch_size: None, + projection: None, + schema_adapter_factory: None, + table_schema, } } } @@ -136,7 +144,7 @@ impl FileSource for TestSource { Arc::new(TestOpener { batches: self.batches.clone(), batch_size: self.batch_size, - schema: self.schema.clone(), + schema: Arc::clone(&self.schema), projection: self.projection.clone(), predicate: self.predicate.clone(), }) @@ -157,17 +165,6 @@ impl FileSource for TestSource { }) } - fn with_schema(&self, schema: TableSchema) -> Arc { - assert!( - schema.table_partition_cols().is_empty(), - "TestSource does not support partition columns" - ); - Arc::new(TestSource { - schema: Some(schema.file_schema().clone()), - ..self.clone() - }) - } - fn with_projection(&self, config: &FileScanConfig) -> Arc { Arc::new(TestSource { projection: config.projection_exprs.as_ref().map(|p| p.column_indices()), @@ -260,6 +257,10 @@ impl FileSource for TestSource { fn schema_adapter_factory(&self) -> Option> { self.schema_adapter_factory.clone() } + + fn table_schema(&self) -> &datafusion_datasource::TableSchema { + &self.table_schema + } } #[derive(Debug, Clone)] @@ -289,14 +290,15 @@ impl TestScanBuilder { } pub fn build(self) -> Arc { - let source = Arc::new(TestSource::new(self.support, self.batches)); - let base_config = FileScanConfigBuilder::new( - ObjectStoreUrl::parse("test://").unwrap(), + let source = Arc::new(TestSource::new( Arc::clone(&self.schema), - source, - ) - .with_file(PartitionedFile::new("test.parquet", 123)) - .build(); + self.support, + self.batches, + )); + let base_config = + FileScanConfigBuilder::new(ObjectStoreUrl::parse("test://").unwrap(), source) + .with_file(PartitionedFile::new("test.parquet", 123)) + .build(); DataSourceExec::from_data_source(base_config) } } diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs index 8631613c3925..9d39a80fb9df 100644 --- a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs @@ -24,9 +24,10 @@ use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::memory::MemorySourceConfig; use datafusion::datasource::physical_plan::CsvSource; use datafusion::datasource::source::DataSourceExec; -use datafusion_common::config::ConfigOptions; +use datafusion_common::config::{ConfigOptions, CsvOptions}; use datafusion_common::{JoinSide, JoinType, NullEquality, Result, ScalarValue}; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; +use datafusion_datasource::TableSchema; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_expr::{ @@ -384,14 +385,19 @@ fn create_simple_csv_exec() -> Arc { Field::new("d", DataType::Int32, true), Field::new("e", DataType::Int32, true), ])); - let config = FileScanConfigBuilder::new( - ObjectStoreUrl::parse("test:///").unwrap(), - schema, - Arc::new(CsvSource::new(false, 0, 0)), - ) - .with_file(PartitionedFile::new("x".to_string(), 100)) - .with_projection_indices(Some(vec![0, 1, 2, 3, 4])) - .build(); + let config = + FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), { + let options = CsvOptions { + has_header: Some(false), + delimiter: 0, + quote: 0, + ..Default::default() + }; + Arc::new(CsvSource::new(schema.clone()).with_csv_options(options)) + }) + .with_file(PartitionedFile::new("x".to_string(), 100)) + .with_projection_indices(Some(vec![0, 1, 2, 3, 4])) + .build(); DataSourceExec::from_data_source(config) } @@ -403,14 +409,19 @@ fn create_projecting_csv_exec() -> Arc { Field::new("c", DataType::Int32, true), Field::new("d", DataType::Int32, true), ])); - let config = FileScanConfigBuilder::new( - ObjectStoreUrl::parse("test:///").unwrap(), - schema, - Arc::new(CsvSource::new(false, 0, 0)), - ) - .with_file(PartitionedFile::new("x".to_string(), 100)) - .with_projection_indices(Some(vec![3, 2, 1])) - .build(); + let config = + FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), { + let options = CsvOptions { + has_header: Some(false), + delimiter: 0, + quote: 0, + ..Default::default() + }; + Arc::new(CsvSource::new(schema.clone()).with_csv_options(options)) + }) + .with_file(PartitionedFile::new("x".to_string(), 100)) + .with_projection_indices(Some(vec![3, 2, 1])) + .build(); DataSourceExec::from_data_source(config) } @@ -1589,13 +1600,21 @@ fn partitioned_data_source() -> Arc { Field::new("string_col", DataType::Utf8, true), ])); + let options = CsvOptions { + has_header: Some(false), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + let table_schema = TableSchema::new( + Arc::clone(&file_schema), + vec![Arc::new(Field::new("partition_col", DataType::Utf8, true))], + ); let config = FileScanConfigBuilder::new( ObjectStoreUrl::parse("test:///").unwrap(), - file_schema.clone(), - Arc::new(CsvSource::default()), + Arc::new(CsvSource::new(table_schema).with_csv_options(options)), ) .with_file(PartitionedFile::new("x".to_string(), 100)) - .with_table_partition_cols(vec![Field::new("partition_col", DataType::Utf8, true)]) .with_projection_indices(Some(vec![0, 1, 2])) .build(); diff --git a/datafusion/core/tests/physical_optimizer/test_utils.rs b/datafusion/core/tests/physical_optimizer/test_utils.rs index 8ca33f3d4abb..60fec2243621 100644 --- a/datafusion/core/tests/physical_optimizer/test_utils.rs +++ b/datafusion/core/tests/physical_optimizer/test_utils.rs @@ -73,8 +73,7 @@ use datafusion_physical_plan::{ pub fn parquet_exec(schema: SchemaRef) -> Arc { let config = FileScanConfigBuilder::new( ObjectStoreUrl::parse("test:///").unwrap(), - schema, - Arc::new(ParquetSource::default()), + Arc::new(ParquetSource::new(schema)), ) .with_file(PartitionedFile::new("x".to_string(), 100)) .build(); @@ -89,8 +88,7 @@ pub(crate) fn parquet_exec_with_sort( ) -> Arc { let config = FileScanConfigBuilder::new( ObjectStoreUrl::parse("test:///").unwrap(), - schema, - Arc::new(ParquetSource::default()), + Arc::new(ParquetSource::new(schema)), ) .with_file(PartitionedFile::new("x".to_string(), 100)) .with_output_ordering(output_ordering) @@ -127,8 +125,7 @@ pub(crate) fn parquet_exec_with_stats(file_size: u64) -> Arc { let config = FileScanConfigBuilder::new( ObjectStoreUrl::parse("test:///").unwrap(), - schema(), - Arc::new(ParquetSource::new(Default::default())), + Arc::new(ParquetSource::new(schema())), ) .with_file(PartitionedFile::new("x".to_string(), file_size)) .with_statistics(statistics) diff --git a/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs b/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs index c3c92a9028d6..191529816481 100644 --- a/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs +++ b/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs @@ -27,12 +27,14 @@ use datafusion::datasource::physical_plan::{ }; use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::SessionContext; +use datafusion_common::config::CsvOptions; use datafusion_common::ColumnStatistics; use datafusion_datasource::file_scan_config::FileScanConfigBuilder; use datafusion_datasource::schema_adapter::{ SchemaAdapter, SchemaAdapterFactory, SchemaMapper, }; use datafusion_datasource::source::DataSourceExec; +use datafusion_datasource::TableSchema; use datafusion_execution::object_store::ObjectStoreUrl; use object_store::{memory::InMemory, path::Path, ObjectStore}; use parquet::arrow::ArrowWriter; @@ -182,17 +184,17 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> { let ctx = SessionContext::new(); ctx.register_object_store(store_url.as_ref(), Arc::clone(&store)); - // Create a ParquetSource with the adapter factory - let file_source = ParquetSource::default() - .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?; - // Create a table schema with uppercase column names let table_schema = Arc::new(Schema::new(vec![ Field::new("ID", DataType::Int32, false), Field::new("NAME", DataType::Utf8, true), ])); - let config = FileScanConfigBuilder::new(store_url, table_schema.clone(), file_source) + // Create a ParquetSource with the adapter factory + let file_source = ParquetSource::new(table_schema.clone()) + .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?; + + let config = FileScanConfigBuilder::new(store_url, file_source) .with_file(PartitionedFile::new(path, file_size)) .build(); @@ -245,10 +247,10 @@ async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter( ctx.register_object_store(store_url.as_ref(), Arc::clone(&store)); // Create a ParquetSource with the adapter factory - let file_source = ParquetSource::default() + let file_source = ParquetSource::new(batch.schema()) .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?; - let config = FileScanConfigBuilder::new(store_url, batch.schema(), file_source) + let config = FileScanConfigBuilder::new(store_url, file_source) .with_file(PartitionedFile::new(path, file_size)) .build(); @@ -282,9 +284,12 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> { // Create a test factory let factory = Arc::new(UppercaseAdapterFactory {}); - // Test ArrowSource + // Test ArrowFileSource { - let source = ArrowSource::default(); + let schema = + Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); + let table_schema = TableSchema::new(schema, vec![]); + let source = ArrowSource::new_file_source(table_schema); let source_with_adapter = source .clone() .with_schema_adapter_factory(factory.clone()) @@ -304,7 +309,9 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> { // Test ParquetSource #[cfg(feature = "parquet")] { - let source = ParquetSource::default(); + let schema = + Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); + let source = ParquetSource::new(schema); let source_with_adapter = source .clone() .with_schema_adapter_factory(factory.clone()) @@ -323,7 +330,15 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> { // Test CsvSource { - let source = CsvSource::default(); + let schema = + Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); + let options = CsvOptions { + has_header: Some(true), + delimiter: b',', + quote: b'"', + ..Default::default() + }; + let source = CsvSource::new(schema).with_csv_options(options); let source_with_adapter = source .clone() .with_schema_adapter_factory(factory.clone()) @@ -342,7 +357,10 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> { // Test JsonSource { - let source = JsonSource::default(); + let schema = + Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])); + let table_schema = TableSchema::new(schema, vec![]); + let source = JsonSource::new(table_schema); let source_with_adapter = source .clone() .with_schema_adapter_factory(factory.clone()) diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs index 26b71b5496f2..929de7a5304d 100644 --- a/datafusion/core/tests/sql/explain_analyze.rs +++ b/datafusion/core/tests/sql/explain_analyze.rs @@ -61,12 +61,9 @@ async fn explain_analyze_baseline_metrics() { assert_metrics!( &formatted, "AggregateExec: mode=Partial, gby=[]", - "metrics=[output_rows=3, elapsed_compute=" - ); - assert_metrics!( - &formatted, - "AggregateExec: mode=Partial, gby=[]", - "output_bytes=" + "metrics=[output_rows=3, elapsed_compute=", + "output_bytes=", + "output_batches=3" ); assert_metrics!( @@ -75,59 +72,76 @@ async fn explain_analyze_baseline_metrics() { "reduction_factor=5.1% (5/99)" ); - assert_metrics!( - &formatted, - "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]", - "metrics=[output_rows=5, elapsed_compute=" - ); - assert_metrics!( - &formatted, - "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]", - "output_bytes=" - ); - assert_metrics!( - &formatted, - "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434", - "metrics=[output_rows=99, elapsed_compute=" - ); + { + let expected_batch_count_after_repartition = + if cfg!(not(feature = "force_hash_collisions")) { + "output_batches=3" + } else { + "output_batches=1" + }; + + assert_metrics!( + &formatted, + "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]", + "metrics=[output_rows=5, elapsed_compute=", + "output_bytes=", + expected_batch_count_after_repartition + ); + + assert_metrics!( + &formatted, + "RepartitionExec: partitioning=Hash([c1@0], 3), input_partitions=3", + "metrics=[output_rows=5, elapsed_compute=", + "output_bytes=", + expected_batch_count_after_repartition + ); + + assert_metrics!( + &formatted, + "ProjectionExec: expr=[]", + "metrics=[output_rows=5, elapsed_compute=", + "output_bytes=", + expected_batch_count_after_repartition + ); + + assert_metrics!( + &formatted, + "CoalesceBatchesExec: target_batch_size=4096", + "metrics=[output_rows=5, elapsed_compute", + "output_bytes=", + expected_batch_count_after_repartition + ); + } + assert_metrics!( &formatted, "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434", - "output_bytes=" + "metrics=[output_rows=99, elapsed_compute=", + "output_bytes=", + "output_batches=1" ); + assert_metrics!( &formatted, "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434", "selectivity=99% (99/100)" ); - assert_metrics!( - &formatted, - "ProjectionExec: expr=[]", - "metrics=[output_rows=5, elapsed_compute=" - ); - assert_metrics!(&formatted, "ProjectionExec: expr=[]", "output_bytes="); - assert_metrics!( - &formatted, - "CoalesceBatchesExec: target_batch_size=4096", - "metrics=[output_rows=5, elapsed_compute" - ); - assert_metrics!( - &formatted, - "CoalesceBatchesExec: target_batch_size=4096", - "output_bytes=" - ); + assert_metrics!( &formatted, "UnionExec", - "metrics=[output_rows=3, elapsed_compute=" + "metrics=[output_rows=3, elapsed_compute=", + "output_bytes=", + "output_batches=3" ); - assert_metrics!(&formatted, "UnionExec", "output_bytes="); + assert_metrics!( &formatted, "WindowAggExec", - "metrics=[output_rows=1, elapsed_compute=" + "metrics=[output_rows=1, elapsed_compute=", + "output_bytes=", + "output_batches=1" ); - assert_metrics!(&formatted, "WindowAggExec", "output_bytes="); fn expected_to_have_metrics(plan: &dyn ExecutionPlan) -> bool { use datafusion::physical_plan; @@ -228,9 +242,13 @@ async fn explain_analyze_level() { for (level, needle, should_contain) in [ (ExplainAnalyzeLevel::Summary, "spill_count", false), + (ExplainAnalyzeLevel::Summary, "output_batches", false), (ExplainAnalyzeLevel::Summary, "output_rows", true), + (ExplainAnalyzeLevel::Summary, "output_bytes", true), (ExplainAnalyzeLevel::Dev, "spill_count", true), (ExplainAnalyzeLevel::Dev, "output_rows", true), + (ExplainAnalyzeLevel::Dev, "output_bytes", true), + (ExplainAnalyzeLevel::Dev, "output_batches", true), ] { let plan = collect_plan(sql, level).await; assert_eq!( @@ -798,14 +816,12 @@ async fn test_physical_plan_display_indent_multi_children() { CoalesceBatchesExec: target_batch_size=4096 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c1@0, c2@0)], projection=[c1@0] CoalesceBatchesExec: target_batch_size=4096 - RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000 - RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1 - DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true + RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=1 + DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true CoalesceBatchesExec: target_batch_size=4096 - RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=9000 - RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1 - ProjectionExec: expr=[c1@0 as c2] - DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true + RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=1 + ProjectionExec: expr=[c1@0 as c2] + DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true "### ); } diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs index 7a5983447592..b64a1e18f3f6 100644 --- a/datafusion/core/tests/sql/joins.rs +++ b/datafusion/core/tests/sql/joins.rs @@ -73,13 +73,11 @@ async fn join_change_in_planner() -> Result<()> { @r" SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a1@0 AS Int64) > CAST(a1@1 AS Int64) + 3 AND CAST(a1@0 AS Int64) < CAST(a1@1 AS Int64) + 10 CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a1@0 ASC NULLS LAST - RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 - StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST] + RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1 + StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a1@0 ASC NULLS LAST - RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 - StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST] + RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1 + StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST] " ); Ok(()) @@ -134,13 +132,11 @@ async fn join_no_order_on_filter() -> Result<()> { @r" SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a3@0 AS Int64) > CAST(a3@1 AS Int64) + 3 AND CAST(a3@0 AS Int64) < CAST(a3@1 AS Int64) + 10 CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8 - RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 - StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST] + RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1 + StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8 - RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 - StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST] + RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1 + StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST] " ); Ok(()) @@ -177,13 +173,11 @@ async fn join_change_in_planner_without_sort() -> Result<()> { @r" SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a1@0 AS Int64) > CAST(a1@1 AS Int64) + 3 AND CAST(a1@0 AS Int64) < CAST(a1@1 AS Int64) + 10 CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8 - RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 - StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true + RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1 + StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8 - RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 - StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true + RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1 + StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true " ); Ok(()) diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 743c8750b521..426ec213b324 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -40,18 +40,24 @@ use std::io::Write; use std::path::PathBuf; use tempfile::TempDir; -/// A macro to assert that some particular line contains two substrings +/// A macro to assert that some particular line contains the given substrings /// -/// Usage: `assert_metrics!(actual, operator_name, metrics)` +/// Usage: `assert_metrics!(actual, operator_name, metrics_1, metrics_2, ...)` macro_rules! assert_metrics { - ($ACTUAL: expr, $OPERATOR_NAME: expr, $METRICS: expr) => { + ($ACTUAL: expr, $OPERATOR_NAME: expr, $($METRICS: expr),+) => { let found = $ACTUAL .lines() - .any(|line| line.contains($OPERATOR_NAME) && line.contains($METRICS)); + .any(|line| line.contains($OPERATOR_NAME) $( && line.contains($METRICS))+); + + let mut metrics = String::new(); + $(metrics.push_str(format!(" '{}',", $METRICS).as_str());)+ + // remove the last `,` from the string + metrics.pop(); + assert!( found, - "Can not find a line with both '{}' and '{}' in\n\n{}", - $OPERATOR_NAME, $METRICS, $ACTUAL + "Cannot find a line with operator name '{}' and metrics containing values {} in :\n\n{}", + $OPERATOR_NAME, metrics, $ACTUAL ); }; } diff --git a/datafusion/datasource-arrow/src/file_format.rs b/datafusion/datasource-arrow/src/file_format.rs index 3b8564080421..ef478e268890 100644 --- a/datafusion/datasource-arrow/src/file_format.rs +++ b/datafusion/datasource-arrow/src/file_format.rs @@ -20,15 +20,15 @@ //! Works with files following the [Arrow IPC format](https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format) use std::any::Any; -use std::borrow::Cow; use std::collections::HashMap; use std::fmt::{self, Debug}; +use std::io::{Seek, SeekFrom}; use std::sync::Arc; use arrow::datatypes::{Schema, SchemaRef}; use arrow::error::ArrowError; use arrow::ipc::convert::fb_to_schema; -use arrow::ipc::reader::FileReader; +use arrow::ipc::reader::{FileReader, StreamReader}; use arrow::ipc::writer::IpcWriteOptions; use arrow::ipc::{root_as_message, CompressionType}; use datafusion_common::error::Result; @@ -45,6 +45,7 @@ use datafusion_datasource::sink::{DataSink, DataSinkExec}; use datafusion_datasource::write::{ get_writer_schema, ObjectWriterBuilder, SharedBuffer, }; +use datafusion_datasource::TableSchema; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_expr::dml::InsertOp; use datafusion_physical_expr_common::sort_expr::LexRequirement; @@ -61,7 +62,9 @@ use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan}; use datafusion_session::Session; use futures::stream::BoxStream; use futures::StreamExt; -use object_store::{GetResultPayload, ObjectMeta, ObjectStore}; +use object_store::{ + path::Path, GetOptions, GetRange, GetResultPayload, ObjectMeta, ObjectStore, +}; use tokio::io::AsyncWriteExt; /// Initial writing buffer size. Note this is just a size hint for efficiency. It @@ -71,8 +74,8 @@ const INITIAL_BUFFER_BYTES: usize = 1048576; /// If the buffered Arrow data exceeds this size, it is flushed to object store const BUFFER_FLUSH_BYTES: usize = 1024000; +/// Factory struct used to create [`ArrowFormat`] #[derive(Default, Debug)] -/// Factory struct used to create [ArrowFormat] pub struct ArrowFormatFactory; impl ArrowFormatFactory { @@ -107,7 +110,7 @@ impl GetExt for ArrowFormatFactory { } } -/// Arrow `FileFormat` implementation. +/// Arrow [`FileFormat`] implementation. #[derive(Default, Debug)] pub struct ArrowFormat; @@ -150,12 +153,23 @@ impl FileFormat for ArrowFormat { let schema = match r.payload { #[cfg(not(target_arch = "wasm32"))] GetResultPayload::File(mut file, _) => { - let reader = FileReader::try_new(&mut file, None)?; - reader.schema() - } - GetResultPayload::Stream(stream) => { - infer_schema_from_file_stream(stream).await? + match FileReader::try_new(&mut file, None) { + Ok(reader) => reader.schema(), + Err(file_error) => { + // not in the file format, but FileReader read some bytes + // while trying to parse the file and so we need to rewind + // it to the beginning of the file + file.seek(SeekFrom::Start(0))?; + match StreamReader::try_new(&mut file, None) { + Ok(reader) => reader.schema(), + Err(stream_error) => { + return Err(internal_datafusion_err!("Failed to parse Arrow file as either file format or stream format. File format error: {file_error}. Stream format error: {stream_error}")); + } + } + } + } } + GetResultPayload::Stream(stream) => infer_stream_schema(stream).await?, }; schemas.push(schema.as_ref().clone()); } @@ -175,10 +189,33 @@ impl FileFormat for ArrowFormat { async fn create_physical_plan( &self, - _state: &dyn Session, + state: &dyn Session, conf: FileScanConfig, ) -> Result> { - let source = Arc::new(ArrowSource::default()); + let object_store = state.runtime_env().object_store(&conf.object_store_url)?; + let object_location = &conf + .file_groups + .first() + .ok_or_else(|| internal_datafusion_err!("No files found in file group"))? + .files() + .first() + .ok_or_else(|| internal_datafusion_err!("No files found in file group"))? + .object_meta + .location; + + let table_schema = TableSchema::new( + Arc::clone(conf.file_schema()), + conf.table_partition_cols().clone(), + ); + + let source: Arc = + match is_object_in_arrow_ipc_file_format(object_store, object_location).await + { + Ok(true) => Arc::new(ArrowSource::new_file_source(table_schema)), + Ok(false) => Arc::new(ArrowSource::new_stream_file_source(table_schema)), + Err(e) => Err(e)?, + }; + let config = FileScanConfigBuilder::from(conf) .with_source(source) .build(); @@ -202,12 +239,12 @@ impl FileFormat for ArrowFormat { Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _) } - fn file_source(&self) -> Arc { - Arc::new(ArrowSource::default()) + fn file_source(&self, table_schema: TableSchema) -> Arc { + Arc::new(ArrowSource::new_file_source(table_schema)) } } -/// Implements [`FileSink`] for writing to arrow_ipc files +/// Implements [`FileSink`] for Arrow IPC files struct ArrowFileSink { config: FileSinkConfig, } @@ -344,94 +381,160 @@ impl DataSink for ArrowFileSink { } } +// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs. +// See + const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1']; const CONTINUATION_MARKER: [u8; 4] = [0xff; 4]; -/// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs. -/// See -async fn infer_schema_from_file_stream( +async fn infer_stream_schema( mut stream: BoxStream<'static, object_store::Result>, ) -> Result { - // Expected format: - // - 6 bytes - // - 2 bytes - // - 4 bytes, not present below v0.15.0 - // - 4 bytes - // - // - - // So in first read we need at least all known sized sections, - // which is 6 + 2 + 4 + 4 = 16 bytes. - let bytes = collect_at_least_n_bytes(&mut stream, 16, None).await?; - - // Files should start with these magic bytes - if bytes[0..6] != ARROW_MAGIC { - return Err(ArrowError::ParseError( - "Arrow file does not contain correct header".to_string(), - ))?; - } - - // Since continuation marker bytes added in later versions - let (meta_len, rest_of_bytes_start_index) = if bytes[8..12] == CONTINUATION_MARKER { - (&bytes[12..16], 16) + // IPC streaming format. + // See https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format + // + // + // + // ... + // + // + // ... + // + // ... + // + // ... + // + // + + // The streaming format is made up of a sequence of encapsulated messages. + // See https://arrow.apache.org/docs/format/Columnar.html#encapsulated-message-format + // + // (added in v0.15.0) + // + // + // + // + // + // The first message is the schema. + + // IPC file format is a wrapper around the streaming format with indexing information. + // See https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format + // + // + // + // + //