Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 15 additions & 22 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ members = [
"guests/python",
"guests/rust",
"host",
"query",
]

[workspace.package]
Expand All @@ -24,9 +25,12 @@ datafusion-sql = { version = "49.0.1", default-features = false }
datafusion-udf-wasm-arrow2bytes = { path = "arrow2bytes", version = "0.1.0" }
datafusion-udf-wasm-bundle = { path = "guests/bundle", version = "0.1.0" }
datafusion-udf-wasm-guest = { path = "guests/rust", version = "0.1.0" }
datafusion-udf-wasm-host = { path = "host", version = "0.1.0" }
datafusion-udf-wasm-python = { path = "guests/python", version = "0.1.0" }
datafusion-udf-wasm-query = { path = "query", version = "0.1.0" }
http = { version = "1.3.1", default-features = false }
hyper = { version = "1.7", default-features = false }
insta = { version = "1.43.2", "default-features" = false }
pyo3 = { version = "0.27.1", default-features = false, features = ["macros"] }
sqlparser = { version = "0.55.0", default-features = false, features = [
"std",
Expand Down
5 changes: 1 addition & 4 deletions host/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,13 @@ license.workspace = true
[dependencies]
anyhow.workspace = true
arrow.workspace = true
datafusion.workspace = true
datafusion-common.workspace = true
datafusion-expr.workspace = true
datafusion-sql.workspace = true
datafusion-udf-wasm-arrow2bytes.workspace = true
http.workspace = true
hyper.workspace = true
rand = { version = "0.9" }
siphasher = { version = "1", default-features = false }
sqlparser.workspace = true
tar.workspace = true
tokio = { workspace = true, features = ["rt", "rt-multi-thread", "sync"] }
wasmtime.workspace = true
Expand All @@ -29,7 +26,7 @@ datafusion-udf-wasm-bundle = { workspace = true, features = [
"example",
"python"
] }
insta = "1.43.2"
insta.workspace = true
tokio = { workspace = true, features = ["fs", "macros"] }
wiremock = "0.6.5"

Expand Down
1 change: 0 additions & 1 deletion host/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ mod error;
pub mod http;
mod linker;
mod tokio_helpers;
pub mod udf_query;
pub mod vfs;

/// State of the WASM payload.
Expand Down
1 change: 0 additions & 1 deletion host/tests/integration_tests/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
mod python;
mod rust;
mod test_utils;
mod udf_query;
3 changes: 1 addition & 2 deletions host/tests/integration_tests/python/inspection/errors.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use datafusion_common::DataFusionError;

use crate::integration_tests::python::test_utils::python_scalar_udfs;
use datafusion_common::DataFusionError;

#[tokio::test(flavor = "multi_thread")]
async fn test_invalid_syntax() {
Expand Down
3 changes: 1 addition & 2 deletions host/tests/integration_tests/python/inspection/filter.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use datafusion_expr::ScalarUDFImpl;

use crate::integration_tests::python::test_utils::python_scalar_udfs;
use datafusion_expr::ScalarUDFImpl;

#[tokio::test(flavor = "multi_thread")]
async fn test_underscore() {
Expand Down
2 changes: 1 addition & 1 deletion host/tests/integration_tests/python/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ mod examples;
mod inspection;
mod runtime;
mod state;
pub(crate) mod test_utils;
mod test_utils;
mod types;
7 changes: 3 additions & 4 deletions host/tests/integration_tests/python/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@

use std::sync::Arc;

use crate::integration_tests::{
python::test_utils::python_scalar_udfs, test_utils::ColumnarValueExt,
};
use arrow::{
array::{Array, ArrayRef, Int64Array},
datatypes::{DataType, Field},
};
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl};

use crate::integration_tests::{
python::test_utils::python_scalar_udfs, test_utils::ColumnarValueExt,
};

const CODE: &str = "
# Use system module to store our state.
#
Expand Down
4 changes: 4 additions & 0 deletions host/tests/integration_tests/python/test_utils.rs
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not gonna fly as prod code. I don't think it's that much code though, so you could just copy whatever you need in query.

Copy link
Contributor Author

@Sl1mb0 Sl1mb0 Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this bad? Not arguing at all just genuinely curious as I don't see the problem ATM. Is it because we want host to be completely language-agnostic?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. this code is language-agnostic
  2. this code is esp. guest-agnostic
  3. compiling the host should require you to bundle/compile all kinds of guests. The bundle crate is merely a helper for tests or for users that want an easy way to pull in the guests that we provide in this repo, but there's no hard requirement to use the guests that we provide

Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ use datafusion_common::DataFusionError;
use datafusion_udf_wasm_host::{WasmComponentPrecompiled, WasmScalarUdf};
use tokio::sync::OnceCell;

/// Static precompiled Python WASM component for tests
static COMPONENT: OnceCell<WasmComponentPrecompiled> = OnceCell::const_new();

/// Returns a static reference to the precompiled Python WASM component.
pub(crate) async fn python_component() -> &'static WasmComponentPrecompiled {
COMPONENT
.get_or_init(async || {
Expand All @@ -14,12 +16,14 @@ pub(crate) async fn python_component() -> &'static WasmComponentPrecompiled {
.await
}

/// Compiles the provided Python UDF code into a list of WasmScalarUdf instances.
pub(crate) async fn python_scalar_udfs(code: &str) -> Result<Vec<WasmScalarUdf>, DataFusionError> {
let component = python_component().await;

WasmScalarUdf::new(component, &Default::default(), code.to_owned()).await
}

/// Compiles the provided Python UDF code into a single WasmScalarUdf instance.
pub(crate) async fn python_scalar_udf(code: &str) -> Result<WasmScalarUdf, DataFusionError> {
let udfs = python_scalar_udfs(code).await?;
assert_eq!(udfs.len(), 1);
Expand Down
22 changes: 22 additions & 0 deletions query/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[package]
name = "datafusion-udf-wasm-query"
version.workspace = true
edition.workspace = true
license.workspace = true

[dependencies]
datafusion.workspace = true
datafusion-common.workspace = true
datafusion-expr.workspace = true
datafusion-sql.workspace = true
datafusion-udf-wasm-host.workspace = true
sqlparser.workspace = true
tokio.workspace = true

[dev-dependencies]
datafusion-udf-wasm-bundle = { workspace = true, features = ["python"] }
datafusion-udf-wasm-host = { workspace = true }
insta.workspace = true

[lints]
workspace = true
3 changes: 2 additions & 1 deletion host/src/udf_query.rs → query/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//! Embedded SQL approach for executing UDFs within SQL queries.
#![allow(unused_crate_dependencies)]

use std::collections::HashMap;

Expand All @@ -8,7 +9,7 @@ use datafusion_sql::parser::{DFParserBuilder, Statement};
use sqlparser::ast::{CreateFunctionBody, Expr, Statement as SqlStatement, Value};
use sqlparser::dialect::dialect_from_str;

use crate::{WasmComponentPrecompiled, WasmPermissions, WasmScalarUdf};
use datafusion_udf_wasm_host::{WasmComponentPrecompiled, WasmPermissions, WasmScalarUdf};

/// A [ParsedQuery] contains the extracted UDFs and SQL query string
#[derive(Debug)]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
#![expect(
// Docs are not strictly required for tests.
missing_docs,
// unused-crate-dependencies false positives
unused_crate_dependencies,
)]

use std::collections::HashMap;

use datafusion::{
assert_batches_eq,
prelude::{DataFrame, SessionContext},
};
use datafusion_common::{Result as DataFusionResult, test_util::batches_to_string};
use datafusion_udf_wasm_host::{
WasmPermissions,
udf_query::{ParsedQuery, UdfQueryParser},
};
use datafusion_udf_wasm_host::WasmPermissions;
use datafusion_udf_wasm_query::{ParsedQuery, UdfQueryParser};

mod integration_tests;

use crate::integration_tests::python::test_utils::python_component;

Expand Down
1 change: 1 addition & 0 deletions query/tests/integration_tests/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub(crate) mod python;
1 change: 1 addition & 0 deletions query/tests/integration_tests/python/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub(crate) mod test_utils;
16 changes: 16 additions & 0 deletions query/tests/integration_tests/python/test_utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use datafusion_udf_wasm_host::WasmComponentPrecompiled;
use tokio::sync::OnceCell;

/// Static precompiled Python WASM component for tests
static COMPONENT: OnceCell<WasmComponentPrecompiled> = OnceCell::const_new();

/// Returns a static reference to the precompiled Python WASM component.
pub(crate) async fn python_component() -> &'static WasmComponentPrecompiled {
COMPONENT
.get_or_init(async || {
WasmComponentPrecompiled::new(datafusion_udf_wasm_bundle::BIN_PYTHON.into())
.await
.unwrap()
})
.await
}