From aabd4899cd936070b14fbd33298e6974f4660abe Mon Sep 17 00:00:00 2001 From: Ning Sun Date: Mon, 22 Sep 2025 01:32:24 +0800 Subject: [PATCH 1/3] feat: include geo --- Cargo.lock | 391 +++++++++++++++++++++++++++++++-- datafusion-postgres/Cargo.toml | 5 + 2 files changed, 380 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 04ea368..7ea23d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -160,6 +160,15 @@ dependencies = [ "zstd", ] +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "array-init" version = "2.1.0" @@ -823,6 +832,25 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -955,7 +983,7 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "itertools", + "itertools 0.14.0", "log", "object_store", "parking_lot", @@ -990,7 +1018,7 @@ dependencies = [ "datafusion-session", "datafusion-sql", "futures", - "itertools", + "itertools 0.14.0", "log", "object_store", "parking_lot", @@ -1081,7 +1109,7 @@ dependencies = [ "flate2", "futures", "glob", - "itertools", + "itertools 0.14.0", "log", "object_store", "parquet", @@ -1193,7 +1221,7 @@ dependencies = [ "datafusion-pruning", "datafusion-session", "futures", - "itertools", + "itertools 0.14.0", "log", "object_store", "parking_lot", @@ -1259,7 +1287,7 @@ dependencies = [ "arrow", "datafusion-common", "indexmap", - "itertools", + "itertools 0.14.0", "paste", ] @@ -1282,7 +1310,7 @@ dependencies = [ "datafusion-expr-common", "datafusion-macros", "hex", - "itertools", + "itertools 0.14.0", "log", "md-5", "rand 0.9.2", @@ -1343,7 +1371,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", - "itertools", + "itertools 0.14.0", "log", "paste", ] @@ -1416,7 +1444,7 @@ dependencies = [ "datafusion-expr-common", "datafusion-physical-expr", "indexmap", - "itertools", + "itertools 0.14.0", "log", "recursive", "regex", @@ -1439,7 +1467,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "parking_lot", "paste", @@ -1458,7 +1486,7 @@ dependencies = [ "datafusion-functions", "datafusion-physical-expr", "datafusion-physical-expr-common", - "itertools", + "itertools 0.14.0", ] [[package]] @@ -1472,7 +1500,7 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "itertools", + "itertools 0.14.0", ] [[package]] @@ -1490,7 +1518,7 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-pruning", - "itertools", + "itertools 0.14.0", "log", "recursive", ] @@ -1519,7 +1547,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.14.0", "log", "parking_lot", "pin-project-lite", @@ -1537,6 +1565,7 @@ dependencies = [ "datafusion", "env_logger", "futures", + "geodatafusion", "getset", "log", "pgwire", @@ -1574,7 +1603,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "itertools", + "itertools 0.14.0", "log", ] @@ -1595,7 +1624,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-sql", "futures", - "itertools", + "itertools 0.14.0", "log", "object_store", "parking_lot", @@ -1662,6 +1691,16 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "earcutr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" +dependencies = [ + "itertools 0.11.0", + "num-traits", +] + [[package]] name = "either" version = "1.15.0" @@ -1746,6 +1785,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float_next_after" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" + [[package]] name = "fnv" version = "1.0.7" @@ -1872,6 +1917,153 @@ dependencies = [ "version_check", ] +[[package]] +name = "geo" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc1a1678e54befc9b4bcab6cd43b8e7f834ae8ea121118b0fd8c42747675b4a" +dependencies = [ + "earcutr", + "float_next_after", + "geo-types", + "geographiclib-rs", + "i_overlay", + "log", + "num-traits", + "robust", + "rstar", + "spade", +] + +[[package]] +name = "geo-traits" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7c353d12a704ccfab1ba8bfb1a7fe6cb18b665bf89d37f4f7890edcd260206" +dependencies = [ + "geo-types", +] + +[[package]] +name = "geo-types" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a4dcd69d35b2c87a7c83bce9af69fd65c9d68d3833a0ded568983928f3fc99" +dependencies = [ + "approx", + "num-traits", + "rayon", + "rstar", + "serde", +] + +[[package]] +name = "geoarrow-array" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73241361a33c1339883aa98d0655bd0dad53f760b9fe92ffbd1d5bbd87dc07bc" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "geo-traits", + "geoarrow-schema 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits", + "wkb", + "wkt", +] + +[[package]] +name = "geoarrow-array" +version = "0.5.0" +source = "git+https://github.com/geoarrow/geoarrow-rs?rev=61a535b072766003ad06a4d7a25dcc15f010e68f#61a535b072766003ad06a4d7a25dcc15f010e68f" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-schema", + "geo-traits", + "geoarrow-schema 0.5.0 (git+https://github.com/geoarrow/geoarrow-rs?rev=61a535b072766003ad06a4d7a25dcc15f010e68f)", + "num-traits", + "wkb", + "wkt", +] + +[[package]] +name = "geoarrow-geo" +version = "0.5.0" +source = "git+https://github.com/geoarrow/geoarrow-rs?rev=61a535b072766003ad06a4d7a25dcc15f010e68f#61a535b072766003ad06a4d7a25dcc15f010e68f" +dependencies = [ + "arrow-array", + "arrow-buffer", + "geo", + "geo-traits", + "geoarrow-array 0.5.0 (git+https://github.com/geoarrow/geoarrow-rs?rev=61a535b072766003ad06a4d7a25dcc15f010e68f)", + "geoarrow-schema 0.5.0 (git+https://github.com/geoarrow/geoarrow-rs?rev=61a535b072766003ad06a4d7a25dcc15f010e68f)", +] + +[[package]] +name = "geoarrow-schema" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5ebd2cbf841a1062d3521c2b2de7e819bb201b30f23381f872e62b9430b82af" +dependencies = [ + "arrow-schema", + "geo-traits", + "serde", + "serde_json", + "thiserror 1.0.69", +] + +[[package]] +name = "geoarrow-schema" +version = "0.5.0" +source = "git+https://github.com/geoarrow/geoarrow-rs?rev=61a535b072766003ad06a4d7a25dcc15f010e68f#61a535b072766003ad06a4d7a25dcc15f010e68f" +dependencies = [ + "arrow-schema", + "geo-traits", + "serde", + "serde_json", + "thiserror 1.0.69", +] + +[[package]] +name = "geodatafusion" +version = "0.1.0-dev" +source = "git+https://github.com/datafusion-contrib/datafusion-geo.git#171bdaf31069efe618371249e051e5b8eaa109d1" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-schema", + "datafusion", + "geo", + "geo-traits", + "geoarrow-array 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "geoarrow-geo", + "geoarrow-schema 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "geohash", + "thiserror 1.0.69", + "wkt", +] + +[[package]] +name = "geographiclib-rs" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" +dependencies = [ + "libm", +] + +[[package]] +name = "geohash" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fb94b1a65401d6cbf22958a9040aa364812c26674f841bee538b12c135db1e6" +dependencies = [ + "geo-types", + "libm", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -1932,6 +2124,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "hash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -1962,6 +2163,16 @@ dependencies = [ "foldhash", ] +[[package]] +name = "heapless" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +dependencies = [ + "hash32", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.3.3" @@ -2009,6 +2220,49 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" +[[package]] +name = "i_float" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "010025c2c532c8d82e42d0b8bb5184afa449fa6f06c709ea9adcb16c49ae405b" +dependencies = [ + "libm", +] + +[[package]] +name = "i_key_sort" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9190f86706ca38ac8add223b2aed8b1330002b5cdbbce28fb58b10914d38fc27" + +[[package]] +name = "i_overlay" +version = "4.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fcccbd4e4274e0f80697f5fbc6540fdac533cce02f2081b328e68629cce24f9" +dependencies = [ + "i_float", + "i_key_sort", + "i_shape", + "i_tree", + "rayon", +] + +[[package]] +name = "i_shape" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ea154b742f7d43dae2897fcd5ead86bc7b5eefcedd305a7ebf9f69d44d61082" +dependencies = [ + "i_float", +] + +[[package]] +name = "i_tree" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e6d558e6d4c7b82bc51d9c771e7a927862a161a7d87bf2b0541450e0e20915" + [[package]] name = "iana-time-zone" version = "0.1.63" @@ -2179,6 +2433,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -2523,6 +2786,28 @@ dependencies = [ "libm", ] +[[package]] +name = "num_enum" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "object" version = "0.36.7" @@ -2544,7 +2829,7 @@ dependencies = [ "futures", "http", "humantime", - "itertools", + "itertools 0.14.0", "parking_lot", "percent-encoding", "thiserror 2.0.16", @@ -2988,6 +3273,26 @@ dependencies = [ "getrandom 0.3.3", ] +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "recursive" version = "0.1.1" @@ -3104,6 +3409,23 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "robust" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e27ee8bb91ca0adcf0ecb116293afa12d393f9c2b9b9cd54d33e8078fe19839" + +[[package]] +name = "rstar" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "421400d13ccfd26dfa5858199c30a5d76f9c54e0dba7575273025b43c5175dbb" +dependencies = [ + "heapless", + "num-traits", + "smallvec", +] + [[package]] name = "rust_decimal" version = "1.38.0" @@ -3354,6 +3676,18 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "spade" +version = "2.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb313e1c8afee5b5647e00ee0fe6855e3d529eb863a0fdae1d60006c4d1e9990" +dependencies = [ + "hashbrown 0.15.5", + "num-traits", + "robust", + "smallvec", +] + [[package]] name = "spki" version = "0.7.3" @@ -4193,6 +4527,31 @@ dependencies = [ "bitflags 2.9.3", ] +[[package]] +name = "wkb" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff9eff6aebac4c64f9c7c057a68f6359284e2a80acf102dffe041fe219b3a082" +dependencies = [ + "byteorder", + "geo-traits", + "num_enum", + "thiserror 1.0.69", +] + +[[package]] +name = "wkt" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7" +dependencies = [ + "geo-traits", + "geo-types", + "log", + "num-traits", + "thiserror 1.0.69", +] + [[package]] name = "writeable" version = "0.6.1" diff --git a/datafusion-postgres/Cargo.toml b/datafusion-postgres/Cargo.toml index 6f84537..7c8aaa2 100644 --- a/datafusion-postgres/Cargo.toml +++ b/datafusion-postgres/Cargo.toml @@ -24,6 +24,7 @@ bytes.workspace = true async-trait = "0.1" chrono.workspace = true datafusion.workspace = true +geodatafusion = { git = "https://github.com/datafusion-contrib/datafusion-geo.git", optional = true } futures.workspace = true getset = "0.1" log = "0.4" @@ -37,3 +38,7 @@ rustls-pki-types = "1.0" [dev-dependencies] env_logger = "0.11" + +[features] +default = ["geo"] +geo = ["geodatafusion"] From e7fde70524f2b826bc03b5cf07968a8c12355838 Mon Sep 17 00:00:00 2001 From: Ning Sun Date: Sat, 25 Oct 2025 09:31:43 +0800 Subject: [PATCH 2/3] feat: geodatafusion integration --- datafusion-postgres/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datafusion-postgres/src/lib.rs b/datafusion-postgres/src/lib.rs index 4ced1fc..cc23f53 100644 --- a/datafusion-postgres/src/lib.rs +++ b/datafusion-postgres/src/lib.rs @@ -88,6 +88,10 @@ pub async fn serve( opts: &ServerOptions, auth_manager: Arc, ) -> Result<(), std::io::Error> { + #[cfg(feature = "geo")] + session_context + .register_udf(geodatafusion::udf::native::constructors::MakePoint::default().into()); + // Create the handler factory with authentication let factory = Arc::new(HandlerFactory::new(session_context, auth_manager)); From 5f6689982acefe6dabb89bb1b6387ee458660541 Mon Sep 17 00:00:00 2001 From: Ning Sun Date: Sat, 1 Nov 2025 03:01:58 +0800 Subject: [PATCH 3/3] feat: introduce postgis functions --- Cargo.lock | 4 ++-- datafusion-postgres/Cargo.toml | 6 +++--- datafusion-postgres/src/lib.rs | 8 +++++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3c522b..ab3661f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2051,9 +2051,9 @@ dependencies = [ [[package]] name = "geodatafusion" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948d2db90029afd0b0678823e6c2c18bcc7ecb2f6260210941d6b7d1a1d551e" +checksum = "83d676b8d8b5f391ab4270ba31e9b599ee2c3d780405a38e272a0a7565ea189c" dependencies = [ "arrow-arith", "arrow-array", diff --git a/datafusion-postgres/Cargo.toml b/datafusion-postgres/Cargo.toml index 33a6a47..54e15e3 100644 --- a/datafusion-postgres/Cargo.toml +++ b/datafusion-postgres/Cargo.toml @@ -18,7 +18,7 @@ bytes.workspace = true async-trait = "0.1" chrono.workspace = true datafusion.workspace = true -geodatafusion = { version = "0.1", optional = true } +geodatafusion = { version = "0.1.1", optional = true } datafusion-pg-catalog = { path = "../datafusion-pg-catalog", version = "0.12.0" } futures.workspace = true getset = "0.1" @@ -35,5 +35,5 @@ rustls-pki-types = "1.0" env_logger = "0.11" [features] -default = ["geo"] -geo = ["geodatafusion"] +default = [] +postgis = ["geodatafusion"] diff --git a/datafusion-postgres/src/lib.rs b/datafusion-postgres/src/lib.rs index cc23f53..ba16938 100644 --- a/datafusion-postgres/src/lib.rs +++ b/datafusion-postgres/src/lib.rs @@ -88,9 +88,8 @@ pub async fn serve( opts: &ServerOptions, auth_manager: Arc, ) -> Result<(), std::io::Error> { - #[cfg(feature = "geo")] - session_context - .register_udf(geodatafusion::udf::native::constructors::MakePoint::default().into()); + #[cfg(feature = "postgis")] + geodatafusion::register(&session_context); // Create the handler factory with authentication let factory = Arc::new(HandlerFactory::new(session_context, auth_manager)); @@ -106,6 +105,9 @@ pub async fn serve_with_hooks( auth_manager: Arc, hooks: Vec>, ) -> Result<(), std::io::Error> { + #[cfg(feature = "postgis")] + geodatafusion::register(&session_context); + // Create the handler factory with authentication let factory = Arc::new(HandlerFactory::new_with_hooks( session_context,