diff --git a/Cargo.lock b/Cargo.lock index 3b2e73c..5d8cb37 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -497,18 +497,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "console" -version = "0.15.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" -dependencies = [ - "encode_unicode", - "libc", - "once_cell", - "windows-sys 0.59.0", -] - [[package]] name = "const-random" version = "0.1.18" @@ -1278,10 +1266,8 @@ version = "0.1.0" dependencies = [ "anyhow", "arrow", - "datafusion", "datafusion-common", "datafusion-expr", - "datafusion-sql", "datafusion-udf-wasm-arrow2bytes", "datafusion-udf-wasm-bundle", "http", @@ -1289,7 +1275,6 @@ dependencies = [ "insta", "rand 0.9.2", "siphasher", - "sqlparser", "tar", "tokio", "wasmtime", @@ -1314,6 +1299,21 @@ dependencies = [ "wasip2", ] +[[package]] +name = "datafusion-udf-wasm-query" +version = "0.1.0" +dependencies = [ + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-sql", + "datafusion-udf-wasm-bundle", + "datafusion-udf-wasm-host", + "insta", + "sqlparser", + "tokio", +] + [[package]] name = "deadpool" version = "0.12.3" @@ -1361,12 +1361,6 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" -[[package]] -name = "encode_unicode" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" - [[package]] name = "encoding_rs" version = "0.8.35" @@ -1936,7 +1930,6 @@ version = "1.43.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0" dependencies = [ - "console", "once_cell", "similar", ] diff --git a/Cargo.toml b/Cargo.toml index af931cc..b6370c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "guests/python", "guests/rust", "host", + "query", ] [workspace.package] @@ -24,9 +25,12 @@ datafusion-sql = { version = "49.0.1", default-features = false } datafusion-udf-wasm-arrow2bytes = { path = "arrow2bytes", version = "0.1.0" } datafusion-udf-wasm-bundle = { path = "guests/bundle", version = "0.1.0" } datafusion-udf-wasm-guest = { path = "guests/rust", version = "0.1.0" } +datafusion-udf-wasm-host = { path = "host", version = "0.1.0" } datafusion-udf-wasm-python = { path = "guests/python", version = "0.1.0" } +datafusion-udf-wasm-query = { path = "query", version = "0.1.0" } http = { version = "1.3.1", default-features = false } hyper = { version = "1.7", default-features = false } +insta = { version = "1.43.2", "default-features" = false } pyo3 = { version = "0.27.1", default-features = false, features = ["macros"] } sqlparser = { version = "0.55.0", default-features = false, features = [ "std", diff --git a/host/Cargo.toml b/host/Cargo.toml index f1bc812..bcaceac 100644 --- a/host/Cargo.toml +++ b/host/Cargo.toml @@ -7,16 +7,13 @@ license.workspace = true [dependencies] anyhow.workspace = true arrow.workspace = true -datafusion.workspace = true datafusion-common.workspace = true datafusion-expr.workspace = true -datafusion-sql.workspace = true datafusion-udf-wasm-arrow2bytes.workspace = true http.workspace = true hyper.workspace = true rand = { version = "0.9" } siphasher = { version = "1", default-features = false } -sqlparser.workspace = true tar.workspace = true tokio = { workspace = true, features = ["rt", "rt-multi-thread", "sync"] } wasmtime.workspace = true @@ -29,7 +26,7 @@ datafusion-udf-wasm-bundle = { workspace = true, features = [ "example", "python" ] } -insta = "1.43.2" +insta.workspace = true tokio = { workspace = true, features = ["fs", "macros"] } wiremock = "0.6.5" diff --git a/host/src/lib.rs b/host/src/lib.rs index a833993..841e046 100644 --- a/host/src/lib.rs +++ b/host/src/lib.rs @@ -47,7 +47,6 @@ mod error; pub mod http; mod linker; mod tokio_helpers; -pub mod udf_query; pub mod vfs; /// State of the WASM payload. diff --git a/host/tests/integration_tests/mod.rs b/host/tests/integration_tests/mod.rs index 9d0eec3..f0b8705 100644 --- a/host/tests/integration_tests/mod.rs +++ b/host/tests/integration_tests/mod.rs @@ -1,4 +1,3 @@ mod python; mod rust; mod test_utils; -mod udf_query; diff --git a/host/tests/integration_tests/python/inspection/errors.rs b/host/tests/integration_tests/python/inspection/errors.rs index c76e436..245e40b 100644 --- a/host/tests/integration_tests/python/inspection/errors.rs +++ b/host/tests/integration_tests/python/inspection/errors.rs @@ -1,6 +1,5 @@ -use datafusion_common::DataFusionError; - use crate::integration_tests::python::test_utils::python_scalar_udfs; +use datafusion_common::DataFusionError; #[tokio::test(flavor = "multi_thread")] async fn test_invalid_syntax() { diff --git a/host/tests/integration_tests/python/inspection/filter.rs b/host/tests/integration_tests/python/inspection/filter.rs index c48c624..b6f4362 100644 --- a/host/tests/integration_tests/python/inspection/filter.rs +++ b/host/tests/integration_tests/python/inspection/filter.rs @@ -1,6 +1,5 @@ -use datafusion_expr::ScalarUDFImpl; - use crate::integration_tests::python::test_utils::python_scalar_udfs; +use datafusion_expr::ScalarUDFImpl; #[tokio::test(flavor = "multi_thread")] async fn test_underscore() { diff --git a/host/tests/integration_tests/python/mod.rs b/host/tests/integration_tests/python/mod.rs index 0cb865f..328379d 100644 --- a/host/tests/integration_tests/python/mod.rs +++ b/host/tests/integration_tests/python/mod.rs @@ -3,5 +3,5 @@ mod examples; mod inspection; mod runtime; mod state; -pub(crate) mod test_utils; +mod test_utils; mod types; diff --git a/host/tests/integration_tests/python/state.rs b/host/tests/integration_tests/python/state.rs index 0443f87..6d91a81 100644 --- a/host/tests/integration_tests/python/state.rs +++ b/host/tests/integration_tests/python/state.rs @@ -5,16 +5,15 @@ use std::sync::Arc; +use crate::integration_tests::{ + python::test_utils::python_scalar_udfs, test_utils::ColumnarValueExt, +}; use arrow::{ array::{Array, ArrayRef, Int64Array}, datatypes::{DataType, Field}, }; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl}; -use crate::integration_tests::{ - python::test_utils::python_scalar_udfs, test_utils::ColumnarValueExt, -}; - const CODE: &str = " # Use system module to store our state. # diff --git a/host/tests/integration_tests/python/test_utils.rs b/host/tests/integration_tests/python/test_utils.rs index 0fbbba7..0fb43e3 100644 --- a/host/tests/integration_tests/python/test_utils.rs +++ b/host/tests/integration_tests/python/test_utils.rs @@ -2,8 +2,10 @@ use datafusion_common::DataFusionError; use datafusion_udf_wasm_host::{WasmComponentPrecompiled, WasmScalarUdf}; use tokio::sync::OnceCell; +/// Static precompiled Python WASM component for tests static COMPONENT: OnceCell = OnceCell::const_new(); +/// Returns a static reference to the precompiled Python WASM component. pub(crate) async fn python_component() -> &'static WasmComponentPrecompiled { COMPONENT .get_or_init(async || { @@ -14,12 +16,14 @@ pub(crate) async fn python_component() -> &'static WasmComponentPrecompiled { .await } +/// Compiles the provided Python UDF code into a list of WasmScalarUdf instances. pub(crate) async fn python_scalar_udfs(code: &str) -> Result, DataFusionError> { let component = python_component().await; WasmScalarUdf::new(component, &Default::default(), code.to_owned()).await } +/// Compiles the provided Python UDF code into a single WasmScalarUdf instance. pub(crate) async fn python_scalar_udf(code: &str) -> Result { let udfs = python_scalar_udfs(code).await?; assert_eq!(udfs.len(), 1); diff --git a/query/Cargo.toml b/query/Cargo.toml new file mode 100644 index 0000000..e633b7e --- /dev/null +++ b/query/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "datafusion-udf-wasm-query" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +datafusion.workspace = true +datafusion-common.workspace = true +datafusion-expr.workspace = true +datafusion-sql.workspace = true +datafusion-udf-wasm-host.workspace = true +sqlparser.workspace = true +tokio.workspace = true + +[dev-dependencies] +datafusion-udf-wasm-bundle = { workspace = true, features = ["python"] } +datafusion-udf-wasm-host = { workspace = true } +insta.workspace = true + +[lints] +workspace = true diff --git a/host/src/udf_query.rs b/query/src/lib.rs similarity index 97% rename from host/src/udf_query.rs rename to query/src/lib.rs index 13f8ded..9ac2c88 100644 --- a/host/src/udf_query.rs +++ b/query/src/lib.rs @@ -1,4 +1,5 @@ //! Embedded SQL approach for executing UDFs within SQL queries. +#![allow(unused_crate_dependencies)] use std::collections::HashMap; @@ -8,7 +9,7 @@ use datafusion_sql::parser::{DFParserBuilder, Statement}; use sqlparser::ast::{CreateFunctionBody, Expr, Statement as SqlStatement, Value}; use sqlparser::dialect::dialect_from_str; -use crate::{WasmComponentPrecompiled, WasmPermissions, WasmScalarUdf}; +use datafusion_udf_wasm_host::{WasmComponentPrecompiled, WasmPermissions, WasmScalarUdf}; /// A [ParsedQuery] contains the extracted UDFs and SQL query string #[derive(Debug)] diff --git a/host/tests/integration_tests/udf_query.rs b/query/tests/integration.rs similarity index 95% rename from host/tests/integration_tests/udf_query.rs rename to query/tests/integration.rs index eff286a..6af2274 100644 --- a/host/tests/integration_tests/udf_query.rs +++ b/query/tests/integration.rs @@ -1,3 +1,10 @@ +#![expect( + // Docs are not strictly required for tests. + missing_docs, + // unused-crate-dependencies false positives + unused_crate_dependencies, +)] + use std::collections::HashMap; use datafusion::{ @@ -5,10 +12,10 @@ use datafusion::{ prelude::{DataFrame, SessionContext}, }; use datafusion_common::{Result as DataFusionResult, test_util::batches_to_string}; -use datafusion_udf_wasm_host::{ - WasmPermissions, - udf_query::{ParsedQuery, UdfQueryParser}, -}; +use datafusion_udf_wasm_host::WasmPermissions; +use datafusion_udf_wasm_query::{ParsedQuery, UdfQueryParser}; + +mod integration_tests; use crate::integration_tests::python::test_utils::python_component; diff --git a/query/tests/integration_tests/mod.rs b/query/tests/integration_tests/mod.rs new file mode 100644 index 0000000..fb20714 --- /dev/null +++ b/query/tests/integration_tests/mod.rs @@ -0,0 +1 @@ +pub(crate) mod python; diff --git a/query/tests/integration_tests/python/mod.rs b/query/tests/integration_tests/python/mod.rs new file mode 100644 index 0000000..1d00cc7 --- /dev/null +++ b/query/tests/integration_tests/python/mod.rs @@ -0,0 +1 @@ +pub(crate) mod test_utils; diff --git a/query/tests/integration_tests/python/test_utils.rs b/query/tests/integration_tests/python/test_utils.rs new file mode 100644 index 0000000..673281b --- /dev/null +++ b/query/tests/integration_tests/python/test_utils.rs @@ -0,0 +1,16 @@ +use datafusion_udf_wasm_host::WasmComponentPrecompiled; +use tokio::sync::OnceCell; + +/// Static precompiled Python WASM component for tests +static COMPONENT: OnceCell = OnceCell::const_new(); + +/// Returns a static reference to the precompiled Python WASM component. +pub(crate) async fn python_component() -> &'static WasmComponentPrecompiled { + COMPONENT + .get_or_init(async || { + WasmComponentPrecompiled::new(datafusion_udf_wasm_bundle::BIN_PYTHON.into()) + .await + .unwrap() + }) + .await +}