Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
1360eba
feat(sql): Add SRID argument to ST_Point()
yutannihilation Nov 4, 2025
ab65370
Update rust/sedona-functions/src/st_setsrid.rs
yutannihilation Nov 6, 2025
7c32e6a
Merge remote-tracking branch 'upstream/main' into feat/srid-kernel
yutannihilation Nov 6, 2025
ead1ea0
Tweak
yutannihilation Nov 7, 2025
272cc62
Add return_type_with_scalar()...
yutannihilation Nov 7, 2025
64e7d13
WIP
yutannihilation Nov 7, 2025
9b6f208
Revert "WIP"
yutannihilation Nov 7, 2025
23d9095
Check length
yutannihilation Nov 7, 2025
39fd6a7
Specify return type when invoke
yutannihilation Nov 8, 2025
7ce3160
Tweak
yutannihilation Nov 8, 2025
5feefc0
Inline variables
yutannihilation Nov 8, 2025
00cc68d
Add some comments
yutannihilation Nov 8, 2025
7e90da3
Add more test cases
yutannihilation Nov 8, 2025
9ebca35
Add a test case of invalid SRID
yutannihilation Nov 8, 2025
1bc5611
Add Python test
yutannihilation Nov 8, 2025
21e0a21
Tweak test cases (not working yet)
yutannihilation Nov 8, 2025
91fcf64
Add TODO comment and skip failing test
yutannihilation Nov 8, 2025
617f7b8
Add some comment
yutannihilation Nov 8, 2025
6f3fab9
Address comments
yutannihilation Nov 8, 2025
3b3ecb1
Tweak
yutannihilation Nov 8, 2025
8eab637
Fix
yutannihilation Nov 8, 2025
5ffdb4f
Improve
yutannihilation Nov 8, 2025
a6d6be0
Do not validate CRS when the inner UDF's return type is None
yutannihilation Nov 9, 2025
efaf059
Propagate errors even when CRS is NULL
yutannihilation Nov 9, 2025
d25849a
Refer to the result's length
yutannihilation Nov 9, 2025
e0fc862
Remove unused imports
yutannihilation Nov 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions python/sedonadb/tests/functions/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,25 @@ def test_st_point(eng, x, y, expected):
)


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("x", "y", "srid", "expected"),
[
(None, None, None, None),
(1, 1, None, None),
(1, 1, 0, 0),
(1, 1, 4326, 4326),
(1, 1, "4326", 4326),
],
)
def test_st_point_with_srid(eng, x, y, srid, expected):
eng = eng.create_or_skip()
eng.assert_query_result(
f"SELECT ST_SRID(ST_Point({val_or_null(x)}, {val_or_null(y)}, {val_or_null(srid)}))",
expected,
)


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("x", "y", "z", "expected"),
Expand Down
74 changes: 67 additions & 7 deletions rust/sedona-functions/src/st_point.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,21 @@ use sedona_schema::{
matchers::ArgMatcher,
};

use crate::executor::WkbExecutor;
use crate::{executor::WkbExecutor, st_setsrid::SRIDifiedKernel};

/// ST_Point() scalar UDF implementation
///
/// Native implementation to create geometries from coordinates.
/// See [`st_geogpoint_udf`] for the corresponding geography constructor.
pub fn st_point_udf() -> SedonaScalarUDF {
let kernel = Arc::new(STGeoFromPoint {
out_type: WKB_GEOMETRY,
});
let sridified_kernel = Arc::new(SRIDifiedKernel::new(kernel.clone()));

SedonaScalarUDF::new(
"st_point",
vec![Arc::new(STGeoFromPoint {
out_type: WKB_GEOMETRY,
})],
vec![sridified_kernel, kernel],
Volatility::Immutable,
Some(doc("ST_Point", "Geometry")),
)
Expand All @@ -52,11 +55,14 @@ pub fn st_point_udf() -> SedonaScalarUDF {
/// Native implementation to create geometries from coordinates.
/// See [`st_geogpoint_udf`] for the corresponding geography constructor.
pub fn st_geogpoint_udf() -> SedonaScalarUDF {
let kernel = Arc::new(STGeoFromPoint {
out_type: WKB_GEOGRAPHY,
});
let sridified_kernel = Arc::new(SRIDifiedKernel::new(kernel.clone()));

SedonaScalarUDF::new(
"st_geogpoint",
vec![Arc::new(STGeoFromPoint {
out_type: WKB_GEOGRAPHY,
})],
vec![sridified_kernel, kernel],
Volatility::Immutable,
Some(doc("st_geogpoint", "Geography")),
)
Expand All @@ -73,6 +79,7 @@ fn doc(name: &str, out_type_name: &str) -> Documentation {
)
.with_argument("x", "double: X value")
.with_argument("y", "double: Y value")
.with_argument("srid", "srid: EPSG code to set (e.g., 4326)")
.with_sql_example(format!("{name}(-64.36, 45.09)"))
.build()
}
Expand Down Expand Up @@ -157,8 +164,11 @@ mod tests {
use arrow_array::create_array;
use arrow_array::ArrayRef;
use arrow_schema::DataType;
use datafusion_expr::Literal;
use datafusion_expr::ScalarUDF;
use rstest::rstest;
use sedona_schema::crs::lnglat;
use sedona_schema::datatypes::Edges;
use sedona_testing::compare::assert_array_equal;
use sedona_testing::{create::create_array, testers::ScalarUdfTester};

Expand Down Expand Up @@ -247,6 +257,56 @@ mod tests {
);
}

#[rstest]
#[case(DataType::UInt32, 4326)]
#[case(DataType::Int32, 4326)]
#[case(DataType::Utf8, "4326")]
#[case(DataType::Utf8, "EPSG:4326")]
fn udf_invoke_with_srid(#[case] srid_type: DataType, #[case] srid_value: impl Literal + Copy) {
let udf = st_point_udf();
let tester = ScalarUdfTester::new(
udf.into(),
vec![
SedonaType::Arrow(DataType::Float64),
SedonaType::Arrow(DataType::Float64),
SedonaType::Arrow(srid_type),
],
);

let return_type = tester
.return_type_with_scalar_scalar_scalar(Some(1.0), Some(2.0), Some(srid_value))
.unwrap();
assert_eq!(return_type, SedonaType::Wkb(Edges::Planar, lnglat()));

let result = tester
.invoke_scalar_scalar_scalar(1.0, 2.0, srid_value)
.unwrap();
tester.assert_scalar_result_equals_with_return_type(result, "POINT (1 2)", return_type);
}

#[test]
fn udf_invoke_with_invalid_srid() {
let udf = st_point_udf();
let tester = ScalarUdfTester::new(
udf.into(),
vec![
SedonaType::Arrow(DataType::Float64),
SedonaType::Arrow(DataType::Float64),
SedonaType::Arrow(DataType::Utf8),
],
);

let return_type = tester.return_type_with_scalar_scalar_scalar(
Some(1.0),
Some(2.0),
Some("gazornenplat"),
);
assert!(return_type.is_err());

let result = tester.invoke_scalar_scalar_scalar(1.0, 2.0, "gazornenplat");
assert!(result.is_err());
}

#[test]
fn geog() {
let udf = st_geogpoint_udf();
Expand Down
116 changes: 115 additions & 1 deletion rust/sedona-functions/src/st_setsrid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@
// under the License.
use std::{sync::Arc, vec};

use arrow_array::builder::BinaryBuilder;
use arrow_schema::DataType;
use datafusion_common::{error::Result, DataFusionError, ScalarValue};
use datafusion_expr::{
scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility,
};
use sedona_common::sedona_internal_err;
use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel, SedonaScalarUDF};
use sedona_geometry::transform::CrsEngine;
use sedona_schema::{crs::deserialize_crs, datatypes::SedonaType, matchers::ArgMatcher};

Expand Down Expand Up @@ -227,6 +228,119 @@ fn determine_return_type(
sedona_internal_err!("Unexpected argument types: {}, {}", args[0], args[1])
}

/// [SedonaScalarKernel] wrapper that handles the SRID argument for constructors like ST_Point
#[derive(Debug)]
pub(crate) struct SRIDifiedKernel {
inner: ScalarKernelRef,
}

impl SRIDifiedKernel {
pub(crate) fn new(inner: ScalarKernelRef) -> Self {
Self { inner }
}
}

impl SedonaScalarKernel for SRIDifiedKernel {
fn return_type_from_args_and_scalars(
&self,
args: &[SedonaType],
scalar_args: &[Option<&ScalarValue>],
) -> Result<Option<SedonaType>> {
// args should consist of the original args and one extra arg for
// specifying CRS. So, first, validate the length and separate these.
//
// [arg0, arg1, ..., crs_arg];
// ^^^^^^^^^^^^^^^
// orig_args
let orig_args_len = match (args.len(), scalar_args.len()) {
(0, 0) => return Ok(None),
(l1, l2) if l1 == l2 => l1 - 1,
_ => return sedona_internal_err!("Arg types and arg values have different lengths"),
};

let orig_args = &args[..orig_args_len];
let orig_scalar_args = &scalar_args[..orig_args_len];
Comment on lines +249 to +262
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I understand this better now...this works, although probably if args.len() == 0 { return Ok(None) } is sufficient (there are a lot of places in our code where we rely on DataFusion passing us the right number of things). I was worried before that if somebody passed (e.g.,) a single argument to ST_Point() something funny would happen here, but I see now that the call to the inner.return_type_from_args_and_scalars() will return correctly return Ok(None) for that case.

Totally optional, but the error message for something like ST_Point('gazornenplat') would probably be better if you moved the call to inner.return_type_from_args_and_scalars() before the CRS parsing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, sounds good to me.


// Invoke the original return_type_from_args_and_scalars() first before checking the CRS argument
let mut inner_result = match self
.inner
.return_type_from_args_and_scalars(orig_args, orig_scalar_args)?
{
Some(sedona_type) => sedona_type,
// if no match, quit here. Since the CRS arg is also an unintended
// one, validating it would be a cryptic error to the user.
None => return Ok(None),
};

let crs = match scalar_args[orig_args_len] {
Some(crs) => crs,
None => return Ok(None),
};
let new_crs = match crs.cast_to(&DataType::Utf8) {
Ok(ScalarValue::Utf8(Some(crs))) => {
if crs == "0" {
None
} else {
validate_crs(&crs, None)?;
deserialize_crs(&serde_json::Value::String(crs))?
}
}
Ok(ScalarValue::Utf8(None)) => None,
Ok(_) | Err(_) => return sedona_internal_err!("Can't cast Crs {crs:?} to Utf8"),
};

match &mut inner_result {
SedonaType::Wkb(_, crs) => *crs = new_crs,
SedonaType::WkbView(_, crs) => *crs = new_crs,
_ => {
return sedona_internal_err!("Return type must be Wkb or WkbView");
}
}

Ok(Some(inner_result))
}

fn invoke_batch(
&self,
arg_types: &[SedonaType],
args: &[ColumnarValue],
) -> Result<ColumnarValue> {
let orig_args_len = arg_types.len() - 1;
let orig_arg_types = &arg_types[..orig_args_len];
let orig_args = &args[..orig_args_len];

// Invoke the inner UDF first to propagate any errors even when the CRS is NULL.
// Note that, this behavior is different from PostGIS.
let result = self.inner.invoke_batch(orig_arg_types, orig_args)?;

// If the specified SRID is NULL, the result is also NULL.
if let ColumnarValue::Scalar(sc) = &args[orig_args_len] {
if sc.is_null() {
// Create the same length of NULLs as the original result.
let len = match &result {
ColumnarValue::Array(array) => array.len(),
ColumnarValue::Scalar(_) => 1,
};

let mut builder = BinaryBuilder::with_capacity(len, 0);
for _ in 0..len {
builder.append_null();
}
let new_array = builder.finish();
return Ok(ColumnarValue::Array(Arc::new(new_array)));
}
}

Ok(result)
}

fn return_type(&self, _args: &[SedonaType]) -> Result<Option<SedonaType>> {
sedona_internal_err!(
"Should not be called because return_type_from_args_and_scalars() is implemented"
)
}
}

#[cfg(test)]
mod test {
use std::rc::Rc;
Expand Down
Loading