diff --git a/common/src/main/java/org/apache/sedona/common/geography/Functions.java b/common/src/main/java/org/apache/sedona/common/geography/Functions.java index 125552ab55..b3d1cc2a43 100644 --- a/common/src/main/java/org/apache/sedona/common/geography/Functions.java +++ b/common/src/main/java/org/apache/sedona/common/geography/Functions.java @@ -274,6 +274,13 @@ public static boolean contains(Geography g1, Geography g2) { return pred.S2_contains(toShapeIndex(g1), toShapeIndex(g2), s2Options()); } + /** Spherical equality test using S2 boolean operations. */ + public static boolean equals(Geography g1, Geography g2) { + if (g1 == null || g2 == null) return false; + Predicates pred = new Predicates(); + return pred.S2_equals(toShapeIndex(g1), toShapeIndex(g2), s2Options()); + } + /** Return EWKT for geography object */ public static String asEWKT(Geography geography) { return geography.toEWKT(); diff --git a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java index 88f41cef29..52ca928925 100644 --- a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java +++ b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java @@ -419,6 +419,35 @@ public void contains_pointOutsidePolygon() throws ParseException { assertFalse(Functions.contains(g1, g2)); } + @Test + public void equals_samePoint() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POINT (1 2)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (1 2)", 4326); + assertTrue(Functions.equals(g1, g2)); + } + + @Test + public void equals_differentPoints() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POINT (1 2)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (3 4)", 4326); + assertFalse(Functions.equals(g1, g2)); + } + + @Test + public void equals_samePolygon() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + Geography g2 = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + assertTrue(Functions.equals(g1, g2)); + } + + @Test + public void equals_nullHandling() throws ParseException { + Geography g = Constructors.geogFromWKT("POINT (1 1)", 4326); + assertFalse(Functions.equals(g, null)); + assertFalse(Functions.equals(null, g)); + assertFalse(Functions.equals(null, null)); + } + @Test public void contains_nullHandling() throws ParseException { Geography g1 = Constructors.geogFromWKT("POINT (1 1)", 4326); diff --git a/docs/api/sql/geography/Geography-Functions.md b/docs/api/sql/geography/Geography-Functions.md index 5daaf5aaf1..32b4faf7f8 100644 --- a/docs/api/sql/geography/Geography-Functions.md +++ b/docs/api/sql/geography/Geography-Functions.md @@ -52,3 +52,4 @@ These functions operate on geography type objects. | [ST_NumGeometries](Geography-Functions/ST_NumGeometries.md) | Integer | Return the number of sub-geometries in a geography (1 for single geometries). | v1.9.1 | | [ST_Distance](Geography-Functions/ST_Distance.md) | Double | Return the minimum geodesic distance between two geographies in meters. | v1.9.0 | | [ST_Contains](Geography-Functions/ST_Contains.md) | Boolean | Test whether geography A fully contains geography B. | v1.9.0 | +| [ST_Equals](Geography-Functions/ST_Equals.md) | Boolean | Test whether two geographies are spatially equal. | v1.9.1 | diff --git a/docs/api/sql/geography/Geography-Functions/ST_Equals.md b/docs/api/sql/geography/Geography-Functions/ST_Equals.md new file mode 100644 index 0000000000..ac01dce9a1 --- /dev/null +++ b/docs/api/sql/geography/Geography-Functions/ST_Equals.md @@ -0,0 +1,45 @@ + + +# ST_Equals + +Introduction: Tests whether two geography objects are spatially equal using S2 spherical boolean operations. Returns true if A and B represent the same spatial region. + +Format: + +`ST_Equals (A: Geography, B: Geography)` + +Return type: `Boolean` + +Since: `v1.9.1` + +SQL Example + +```sql +SELECT ST_Equals( + ST_GeogFromWKT('POINT (1 1)'), + ST_GeogFromWKT('POINT (1 1)') +); +``` + +Output: + +``` +true +``` diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala index 6affd348b3..de94ac5c23 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala @@ -241,12 +241,9 @@ private[apache] case class ST_RelateMatch(inputExpressions: Seq[Expression]) * @param inputExpressions */ private[apache] case class ST_Equals(inputExpressions: Seq[Expression]) - extends ST_Predicate - with CodegenFallback { - - override def evalGeom(leftGeometry: Geometry, rightGeometry: Geometry): Boolean = { - Predicates.equals(leftGeometry, rightGeometry) - } + extends InferredExpression( + inferrableFunction2(Predicates.equals), + inferrableFunction2(org.apache.sedona.common.geography.Functions.equals)) { protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { copy(inputExpressions = newChildren) diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala index 306dd18b94..8acae32e2b 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala @@ -57,12 +57,14 @@ case class JoinQueryDetection( */ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { - // ST_Contains is the only spatial predicate currently wired for Geography (via - // InferredExpression dual dispatch). For broadcast joins we route GeographyUDT inputs through - // a dedicated index/refine path (see SpatialIndexExec.geographyShape and - // BroadcastIndexJoinExec.geographyShape); for the partition/range path we still fall back to - // row-by-row evaluation. Other ST_Predicates reject Geography inputs at analysis time, so no - // guard is needed there. + // Geography spatial predicates wired via InferredExpression dual dispatch: + // * ST_Contains — broadcast joins route GeographyUDT inputs through a dedicated index/refine + // path (see SpatialIndexExec.geographyShape / BroadcastIndexJoinExec.geographyShape). The + // partition/range path still falls back to row-by-row evaluation. + // * ST_Equals — no broadcast index path yet (the Geography refiner is ST_Contains-specific), + // so we gate Geography inputs at the matcher and let Spark evaluate the predicate + // row-by-row. + // Other ST_Predicates reject Geography inputs at analysis time, so no guard is needed there. private def isGeographyInput(shape: Expression): Boolean = shape.dataType.isInstanceOf[GeographyUDT] @@ -132,16 +134,6 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { SpatialPredicate.TOUCHES, false, extraCondition)) - case ST_Equals(Seq(leftShape, rightShape)) => - Some( - JoinQueryDetection( - left, - right, - leftShape, - rightShape, - SpatialPredicate.EQUALS, - false, - extraCondition)) case ST_Crosses(Seq(leftShape, rightShape)) => Some( JoinQueryDetection( @@ -211,11 +203,14 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { val queryDetection: Option[JoinQueryDetection] = condition.flatMap { case joinConditionMatcher(predicate, extraCondition) => predicate match { - // ST_Contains is an InferredExpression (not ST_Predicate) so it can't sit inside - // getJoinDetection. When either operand is GeographyUDT we still detect the join - // here and set `geographyShape = true`; planBroadcastJoin will route the work to - // the Geography-aware index/refine path. Non-broadcast plans bail out in `apply` - // below and fall back to row-by-row evaluation. + // ST_Contains / ST_Equals are InferredExpression (not ST_Predicate) so they can't + // sit inside getJoinDetection; they're also the only predicates currently accepting + // Geography inputs. + // + // ST_Contains: when either operand is GeographyUDT we still detect the join here and + // set `geographyShape = true`; planBroadcastJoin will route the work to the + // Geography-aware index/refine path. Non-broadcast plans bail out in `apply` below + // and fall back to row-by-row evaluation. case ST_Contains(Seq(leftShape, rightShape)) => val geographyShape = isGeographyInput(leftShape) || isGeographyInput(rightShape) @@ -229,6 +224,20 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { isGeography = false, extraCondition, geographyShape = geographyShape)) + // ST_Equals on Geography has no broadcast index path yet (the Geography refiner is + // ST_Contains-specific), so gate Geography inputs and let them fall back to + // row-by-row evaluation. + case ST_Equals(Seq(leftShape, rightShape)) + if !isGeographyInput(leftShape) && !isGeographyInput(rightShape) => + Some( + JoinQueryDetection( + left, + right, + leftShape, + rightShape, + SpatialPredicate.EQUALS, + isGeography = false, + extraCondition)) case pred: ST_Predicate => getJoinDetection(left, right, pred, extraCondition) case pred: RS_Predicate => diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala index b1cae8acb5..790b5e206d 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala @@ -213,6 +213,54 @@ class GeographyFunctionTest extends TestBaseScala { .first() assertTrue(!row.getBoolean(0)) } + + it("ST_Equals same point") { + val row = sparkSession + .sql(""" + SELECT ST_Equals( + ST_GeogFromWKT('POINT (1 2)', 4326), + ST_GeogFromWKT('POINT (1 2)', 4326) + ) AS result + """) + .first() + assertTrue(row.getBoolean(0)) + } + + it("ST_Equals different points") { + val row = sparkSession + .sql(""" + SELECT ST_Equals( + ST_GeogFromWKT('POINT (1 2)', 4326), + ST_GeogFromWKT('POINT (3 4)', 4326) + ) AS result + """) + .first() + assertTrue(!row.getBoolean(0)) + } + + it("ST_Equals same polygon") { + val row = sparkSession + .sql(""" + SELECT ST_Equals( + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326), + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326) + ) AS result + """) + .first() + assertTrue(row.getBoolean(0)) + } + + it("ST_Equals different polygons") { + val row = sparkSession + .sql(""" + SELECT ST_Equals( + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326), + ST_GeogFromWKT('POLYGON ((10 10, 11 10, 11 11, 10 11, 10 10))', 4326) + ) AS result + """) + .first() + assertTrue(!row.getBoolean(0)) + } } // ─── Level 4: ST_Buffer ──────────────────────────────────────────────── @@ -332,6 +380,16 @@ class GeographyFunctionTest extends TestBaseScala { .select(st_predicates.ST_Contains(col("poly"), col("pt")).as("result")) assertTrue(df.first().getBoolean(0)) } + + it("ST_Equals via DataFrame API") { + val df = sparkSession + .sql("SELECT 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))' AS a, 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))' AS b") + .select( + st_constructors.ST_GeogFromWKT(col("a"), lit(4326)).as("a"), + st_constructors.ST_GeogFromWKT(col("b"), lit(4326)).as("b")) + .select(st_predicates.ST_Equals(col("a"), col("b")).as("result")) + assertTrue(df.first().getBoolean(0)) + } } // ─── Serialization round-trip ──────────────────────────────────────────