diff --git a/c/sedona-geos/benches/geos-functions.rs b/c/sedona-geos/benches/geos-functions.rs index 34382bd9..d9e6ae0e 100644 --- a/c/sedona-geos/benches/geos-functions.rs +++ b/c/sedona-geos/benches/geos-functions.rs @@ -295,6 +295,8 @@ fn criterion_benchmark(c: &mut Criterion) { "st_overlaps", ArrayScalar(Polygon(10), Polygon(500)), ); + benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(10)); + benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(500)); } criterion_group!(benches, criterion_benchmark); diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs index 74d57b94..4314c4bf 100644 --- a/c/sedona-geos/src/lib.rs +++ b/c/sedona-geos/src/lib.rs @@ -25,6 +25,7 @@ mod st_buffer; mod st_centroid; mod st_convexhull; mod st_dwithin; +mod st_isvalid; mod st_length; mod st_perimeter; pub mod wkb_to_geos; diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs index f349f7e0..6025b463 100644 --- a/c/sedona-geos/src/register.rs +++ b/c/sedona-geos/src/register.rs @@ -17,6 +17,7 @@ use sedona_expr::scalar_udf::ScalarKernelRef; use crate::st_convexhull::st_convex_hull_impl; +use crate::st_isvalid::st_is_valid_impl; use crate::{ distance::st_distance_impl, st_area::st_area_impl, st_buffer::st_buffer_impl, st_centroid::st_centroid_impl, st_dwithin::st_dwithin_impl, st_length::st_length_impl, @@ -56,5 +57,6 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { ("st_within", st_within_impl()), ("st_crosses", st_crosses_impl()), ("st_overlaps", st_overlaps_impl()), + ("st_isvalid", st_is_valid_impl()), ] } diff --git a/c/sedona-geos/src/st_isvalid.rs b/c/sedona-geos/src/st_isvalid.rs new file mode 100644 index 00000000..3756a102 --- /dev/null +++ b/c/sedona-geos/src/st_isvalid.rs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::sync::Arc; + +use arrow_array::builder::BooleanBuilder; +use arrow_schema::DataType; +use datafusion_common::Result; +use datafusion_expr::ColumnarValue; +use geos::Geom; +use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +use crate::executor::GeosExecutor; + +/// ST_IsValid() implementation using the geos crate +pub fn st_is_valid_impl() -> ScalarKernelRef { + Arc::new(STIsValid {}) +} + +#[derive(Debug)] +struct STIsValid {} + +impl SedonaScalarKernel for STIsValid { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry()], + SedonaType::Arrow(DataType::Boolean), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = GeosExecutor::new(arg_types, args); + let mut builder = BooleanBuilder::with_capacity(executor.num_iterations()); + executor.execute_wkb_void(|maybe_wkb| { + match maybe_wkb { + Some(wkb) => { + builder.append_value(invoke_scalar(&wkb)); + } + _ => builder.append_null(), + } + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +fn invoke_scalar(geos_geom: &geos::Geometry) -> bool { + geos_geom.is_valid() +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use arrow_array::{ArrayRef, BooleanArray}; + use arrow_schema::DataType; + use datafusion_common::ScalarValue; + use rstest::rstest; + use sedona_expr::scalar_udf::SedonaScalarUDF; + use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY}; + use sedona_testing::testers::ScalarUdfTester; + + use super::*; + + #[rstest] + fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { + let udf = SedonaScalarUDF::from_kernel("st_isvalid", st_is_valid_impl()); + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + tester.assert_return_type(DataType::Boolean); + + // Valid polygon + let result = tester + .invoke_scalar("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))") + .unwrap(); + tester.assert_scalar_result_equals(result, true); + + // Invalid polygon (self-intersecting) + let result = tester + .invoke_scalar("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))") + .unwrap(); + tester.assert_scalar_result_equals(result, false); + + let result = tester.invoke_scalar(ScalarValue::Null).unwrap(); + assert!(result.is_null()); + + let input_wkt = vec![ + None, + Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"), + Some("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))"), + Some("LINESTRING (0 0, 1 1)"), + Some("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))"), + ]; + + let expected: ArrayRef = Arc::new(BooleanArray::from(vec![ + None, + Some(true), + Some(false), + Some(true), + Some(false), + ])); + assert_eq!(&tester.invoke_wkb_array(input_wkt).unwrap(), &expected); + } +} diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 5ddd5f9f..e638134b 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -211,6 +211,63 @@ def test_st_centroid(eng, geom, expected): eng.assert_query_result(f"SELECT ST_Centroid({geom_or_null(geom)})", expected) +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom", "expected"), + [ + (None, None), + ("POINT (0 0)", True), + ("POINT EMPTY", True), + ("LINESTRING (0 0, 1 1)", True), + ("LINESTRING (0 0, 1 1, 1 0, 0 1)", True), + ( + "MULTILINESTRING ((0 0, 1 1), (0 0, 1 1, 1 0, 0 1))", + True, + ), + ("LINESTRING EMPTY", True), + # Invalid LineStrings + ("LINESTRING (0 0, 0 0)", False), # Degenerate - both points identical + ("LINESTRING (0 0, 0 0, 0 0)", False), # All points identical + # Invalid MultiLineStrings + ("MULTILINESTRING ((0 0, 0 0), (1 1, 2 2))", False), # Degenerate component + ( + "MULTILINESTRING ((0 0, 0 0), (1 1, 1 1))", + False, + ), # Multiple degenerate components + ("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))", True), + ("POLYGON EMPTY", True), + ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))", True), + # Invalid Polygons + # Self-intersecting polygon (bowtie) + ("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))", False), + # Inner ring shares an edge with the outer ring + ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (0 0, 0 1, 1 1, 1 0, 0 0))", False), + # Self-intersecting polygon (figure-8) + ("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))", False), + # Inner ring touches the outer ring at a point + ( + "POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 10, 1 9, 2 9, 2 10, 1 10))", + False, + ), + # Overlapping polygons in a multipolygon + ( + "MULTIPOLYGON (((0 0, 2 0, 2 2, 0 2, 0 0)), ((1 1, 3 1, 3 3, 1 3, 1 1)))", + False, + ), + ( + "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 2, 3 2, 3 3, 2 3, 2 2)))", + True, + ), + # Geometry collection with an invalid polygon + ("GEOMETRYCOLLECTION (POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0)))", False), + ("GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)))", True), + ], +) +def test_st_isvalid(eng, geom, expected): + eng = eng.create_or_skip() + eng.assert_query_result(f"SELECT ST_IsValid({geom_or_null(geom)})", expected) + + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("geom", "expected"),