From d822ba8f2bbde804b3a541e1a49d383d4539e421 Mon Sep 17 00:00:00 2001
From: Uros Bojanic
Date: Wed, 29 Oct 2025 18:47:16 +0100
Subject: [PATCH 1/3] Initial commit
---
.../catalyst/expressions/ExpressionInfo.java | 2 +-
.../spark/sql/catalyst/util/Geography.java | 5 +-
.../spark/sql/catalyst/util/Geometry.java | 5 +-
.../spark/sql/catalyst/util/STUtils.java | 70 +++++++
.../catalyst/analysis/FunctionRegistry.scala | 5 +
.../sql/catalyst/expressions/literals.scala | 2 +
.../expressions/st/stExpressions.scala | 174 ++++++++++++++++++
.../util/GeographyExecutionSuite.java | 26 +--
.../catalyst/util/GeometryExecutionSuite.java | 26 +--
.../spark/sql/catalyst/util/StUtilsSuite.java | 93 ++++++++++
.../nonansi/st-functions.sql.out | 13 ++
.../analyzer-results/st-functions.sql.out | 13 ++
.../sql-tests/inputs/nonansi/st-functions.sql | 1 +
.../sql-tests/inputs/st-functions.sql | 5 +
.../results/nonansi/st-functions.sql.out | 15 ++
.../sql-tests/results/st-functions.sql.out | 15 ++
.../apache/spark/sql/STExpressionsSuite.scala | 51 +++++
17 files changed, 494 insertions(+), 27 deletions(-)
create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
create mode 100755 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
create mode 100644 sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql
create mode 100644 sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
create mode 100644 sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
create mode 100644 sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 310d18ddb348..dd56c650c073 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -48,7 +48,7 @@ public class ExpressionInfo {
"collection_funcs", "predicate_funcs", "conditional_funcs", "conversion_funcs",
"csv_funcs", "datetime_funcs", "generator_funcs", "hash_funcs", "json_funcs",
"lambda_funcs", "map_funcs", "math_funcs", "misc_funcs", "string_funcs", "struct_funcs",
- "window_funcs", "xml_funcs", "table_funcs", "url_funcs", "variant_funcs"));
+ "window_funcs", "xml_funcs", "table_funcs", "url_funcs", "variant_funcs", "st_funcs"));
private static final Set validSources =
new HashSet<>(Arrays.asList("built-in", "hive", "python_udf", "scala_udf", "sql_udf",
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
index f7b0df8990d3..c46c2368832f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
@@ -77,7 +77,10 @@ public Geography copy() {
// Returns a Geography object with the specified SRID value by parsing the input WKB.
public static Geography fromWkb(byte[] wkb, int srid) {
- throw new UnsupportedOperationException("Geography WKB parsing is not yet supported.");
+ byte[] bytes = new byte[HEADER_SIZE + wkb.length];
+ ByteBuffer.wrap(bytes).order(DEFAULT_ENDIANNESS).putInt(srid);
+ System.arraycopy(wkb, 0, bytes, WKB_OFFSET, wkb.length);
+ return fromBytes(bytes);
}
// Overload for the WKB reader where we use the default SRID for Geography.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
index 81cdaeb97ce2..c4b6e5d0e4bd 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
@@ -77,7 +77,10 @@ public Geometry copy() {
// Returns a Geometry object with the specified SRID value by parsing the input WKB.
public static Geometry fromWkb(byte[] wkb, int srid) {
- throw new UnsupportedOperationException("Geometry WKB parsing is not yet supported.");
+ byte[] bytes = new byte[HEADER_SIZE + wkb.length];
+ ByteBuffer.wrap(bytes).order(DEFAULT_ENDIANNESS).putInt(srid);
+ System.arraycopy(wkb, 0, bytes, WKB_OFFSET, wkb.length);
+ return fromBytes(bytes);
}
// Overload for the WKB reader where we use the default SRID for Geometry.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
new file mode 100644
index 000000000000..641382a0f959
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.util;
+
+import org.apache.spark.unsafe.types.GeographyVal;
+import org.apache.spark.unsafe.types.GeometryVal;
+
+// This class defines static methods that used to implement ST expressions using `StaticInvoke`.
+public final class STUtils {
+
+ /** Conversion methods from physical values to Geography/Geometry objects. */
+
+ // Converts a GEOGRAPHY from its physical value to the corresponding `Geography` object
+ static Geography fromPhysVal(GeographyVal value) {
+ return Geography.fromBytes(value.getBytes());
+ }
+
+ // Converts a GEOMETRY from its physical value to the corresponding `Geometry` object
+ static Geometry fromPhysVal(GeometryVal value) {
+ return Geometry.fromBytes(value.getBytes());
+ }
+
+ /** Conversion methods from Geography/Geometry objects to physical values. */
+
+ // Converts a `Geography` object to the corresponding GEOGRAPHY physical value.
+ static GeographyVal toPhysVal(Geography g) {
+ return g.getValue();
+ }
+
+ // Converts a `Geometry` object to the corresponding GEOMETRY physical value.
+ static GeometryVal toPhysVal(Geometry g) {
+ return g.getValue();
+ }
+
+ /** Methods for implementing ST expressions. */
+
+ // ST_AsBinary
+ public static byte[] stAsBinary(GeographyVal geo) {
+ return fromPhysVal(geo).toWkb();
+ }
+
+ public static byte[] stAsBinary(GeometryVal geo) {
+ return fromPhysVal(geo).toWkb();
+ }
+
+ // ST_GeogFromWKB
+ public static GeographyVal stGeogFromWKB(byte[] wkb) {
+ return toPhysVal(Geography.fromWkb(wkb));
+ }
+
+ // ST_GeomFromWKB
+ public static GeometryVal stGeomFromWKB(byte[] wkb) {
+ return toPhysVal(Geometry.fromWkb(wkb));
+ }
+
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 0a596a8bd63e..97f8cbc23b7a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -873,6 +873,11 @@ object FunctionRegistry {
expression[SchemaOfVariantAgg]("schema_of_variant_agg"),
expression[ToVariantObject]("to_variant_object"),
+ // Spatial
+ expression[ST_AsBinary]("st_asbinary"),
+ expression[ST_GeogFromWKB]("st_geogfromwkb"),
+ expression[ST_GeomFromWKB]("st_geomfromwkb"),
+
// cast
expression[Cast]("cast"),
// Cast aliases (SPARK-16730)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index c799415dfc70..710bd671b29e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -252,6 +252,8 @@ object Literal {
case PhysicalNullType => true
case PhysicalShortType => v.isInstanceOf[Short]
case _: PhysicalStringType => v.isInstanceOf[UTF8String]
+ case _: PhysicalGeographyType => v.isInstanceOf[GeographyVal]
+ case _: PhysicalGeometryType => v.isInstanceOf[GeometryVal]
case PhysicalVariantType => v.isInstanceOf[VariantVal]
case st: PhysicalStructType =>
v.isInstanceOf[InternalRow] && {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
new file mode 100755
index 000000000000..a2493e154ff3
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.expressions.objects._
+import org.apache.spark.sql.catalyst.trees._
+import org.apache.spark.sql.catalyst.util.{Geography, Geometry, STUtils}
+import org.apache.spark.sql.types._
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// This file defines expressions for geospatial operations.
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+// Useful constants for ST expressions.
+private[sql] object ExpressionDefaults {
+ val DEFAULT_GEOGRAPHY_SRID: Int = Geography.DEFAULT_SRID
+ val DEFAULT_GEOMETRY_SRID: Int = Geometry.DEFAULT_SRID
+}
+
+/** ST writer expressions. */
+
+/**
+ * Returns the input GEOGRAPHY or GEOMETRY value in WKB format.
+ * See https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary
+ * for more details on the WKB format.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(geo) - Returns the geospatial value (value of type GEOGRAPHY or GEOMETRY) "
+ + "in WKB format.",
+ arguments = """
+ Arguments:
+ * geo - A geospatial value, either a GEOGRAPHY or a GEOMETRY.
+ """,
+ examples = """
+ Examples:
+ > SELECT hex(_FUNC_(st_geogfromwkb(X'0101000000000000000000f03f0000000000000040')));
+ 0101000000000000000000f03f0000000000000040
+ > SELECT hex(_FUNC_(st_geomfromwkb(X'0101000000000000000000f03f0000000000000040')));
+ 0101000000000000000000f03f0000000000000040
+ """,
+ since = "4.1.0",
+ group = "st_funcs"
+)
+case class ST_AsBinary(geo: Expression)
+ extends RuntimeReplaceable
+ with ImplicitCastInputTypes
+ with UnaryLike[Expression] {
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(
+ TypeCollection(GeographyType, GeometryType)
+ )
+
+ override lazy val replacement: Expression = StaticInvoke(
+ classOf[STUtils],
+ BinaryType,
+ "stAsBinary",
+ Seq(geo),
+ returnNullable = false
+ )
+
+ override def prettyName: String = "st_asbinary"
+
+ override def child: Expression = geo
+
+ override protected def withNewChildInternal(newChild: Expression): ST_AsBinary =
+ copy(geo = newChild)
+}
+
+/** ST reader expressions. */
+
+/**
+ * Parses the WKB description of a geography and returns the corresponding GEOGRAPHY value. The SRID
+ * value of the returned GEOGRAPHY value is 4326.
+ * See https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary
+ * for more details on the WKB format.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(wkb) - Parses the WKB description of a geography and returns the corresponding "
+ + "GEOGRAPHY value.",
+ arguments = """
+ Arguments:
+ * wkb - A BINARY value in WKB format, representing a GEOGRAPHY value.
+ """,
+ examples = """
+ Examples:
+ > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000f03f0000000000000040')));
+ 0101000000000000000000f03f0000000000000040
+ """,
+ since = "4.1.0",
+ group = "st_funcs"
+)
+case class ST_GeogFromWKB(wkb: Expression)
+ extends RuntimeReplaceable
+ with ImplicitCastInputTypes
+ with UnaryLike[Expression] {
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
+
+ override lazy val replacement: Expression = StaticInvoke(
+ classOf[STUtils],
+ GeographyType(ExpressionDefaults.DEFAULT_GEOGRAPHY_SRID),
+ "stGeogFromWKB",
+ Seq(wkb),
+ returnNullable = false
+ )
+
+ override def prettyName: String = "st_geogfromwkb"
+
+ override def child: Expression = wkb
+
+ override protected def withNewChildInternal(newChild: Expression): ST_GeogFromWKB =
+ copy(wkb = newChild)
+}
+
+/**
+ * Parses the WKB description of a geometry and returns the corresponding GEOMETRY value. The SRID
+ * value of the returned GEOMETRY value is 0.
+ * See https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary
+ * for more details on the WKB format.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(wkb) - Parses the WKB description of a geometry and returns the corresponding "
+ + "GEOMETRY value.",
+ arguments = """
+ Arguments:
+ * wkb - A BINARY value in WKB format, representing a GEOMETRY value.
+ """,
+ examples = """
+ Examples:
+ > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000f03f0000000000000040')));
+ 0101000000000000000000f03f0000000000000040
+ """,
+ since = "4.1.0",
+ group = "st_funcs"
+)
+case class ST_GeomFromWKB(wkb: Expression)
+ extends RuntimeReplaceable
+ with ImplicitCastInputTypes
+ with UnaryLike[Expression] {
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
+
+ override lazy val replacement: Expression = StaticInvoke(
+ classOf[STUtils],
+ GeometryType(ExpressionDefaults.DEFAULT_GEOMETRY_SRID),
+ "stGeomFromWKB",
+ Seq(wkb),
+ returnNullable = false
+ )
+
+ override def prettyName: String = "st_geomfromwkb"
+
+ override def child: Expression = wkb
+
+ override protected def withNewChildInternal(newChild: Expression): ST_GeomFromWKB =
+ copy(wkb = newChild)
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
index de1f4d916d2e..f7a0a1929bc1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
@@ -83,23 +83,25 @@ void testDefaultSrid() {
/** Tests for Geography WKB parsing. */
@Test
- void testFromWkbWithSridUnsupported() {
+ void testFromWkbWithSridRudimentary() {
byte[] wkb = new byte[]{1, 2, 3};
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- () -> Geography.fromWkb(wkb, 0)
- );
- assertEquals("Geography WKB parsing is not yet supported.", exception.getMessage());
+ // Note: This is a rudimentary WKB handling test; actual WKB parsing is not yet implemented.
+ // Once we implement the appropriate parsing logic, this test should be updated accordingly.
+ Geography geography = Geography.fromWkb(wkb, 4326);
+ assertNotNull(geography);
+ assertArrayEquals(wkb, geography.toWkb());
+ assertEquals(4326, geography.srid());
}
@Test
- void testFromWkbNoSridUnsupported() {
+ void testFromWkbNoSridRudimentary() {
byte[] wkb = new byte[]{1, 2, 3};
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- () -> Geography.fromWkb(wkb)
- );
- assertEquals("Geography WKB parsing is not yet supported.", exception.getMessage());
+ // Note: This is a rudimentary WKB handling test; actual WKB parsing is not yet implemented.
+ // Once we implement the appropriate parsing logic, this test should be updated accordingly.
+ Geography geography = Geography.fromWkb(wkb);
+ assertNotNull(geography);
+ assertArrayEquals(wkb, geography.toWkb());
+ assertEquals(4326, geography.srid());
}
/** Tests for Geography EWKB parsing. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
index 17950f9cad0d..be43596b7f5a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
@@ -83,23 +83,25 @@ void testDefaultSrid() {
/** Tests for Geometry WKB parsing. */
@Test
- void testFromWkbWithSridUnsupported() {
+ void testFromWkbWithSridRudimentary() {
byte[] wkb = new byte[]{1, 2, 3};
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- () -> Geometry.fromWkb(wkb, 0)
- );
- assertEquals("Geometry WKB parsing is not yet supported.", exception.getMessage());
+ // Note: This is a rudimentary WKB handling test; actual WKB parsing is not yet implemented.
+ // Once we implement the appropriate parsing logic, this test should be updated accordingly.
+ Geometry geometry = Geometry.fromWkb(wkb, 4326);
+ assertNotNull(geometry);
+ assertArrayEquals(wkb, geometry.toWkb());
+ assertEquals(4326, geometry.srid());
}
@Test
- void testFromWkbNoSridUnsupported() {
+ void testFromWkbNoSridRudimentary() {
byte[] wkb = new byte[]{1, 2, 3};
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- () -> Geometry.fromWkb(wkb)
- );
- assertEquals("Geometry WKB parsing is not yet supported.", exception.getMessage());
+ // Note: This is a rudimentary WKB handling test; actual WKB parsing is not yet implemented.
+ // Once we implement the appropriate parsing logic, this test should be updated accordingly.
+ Geometry geometry = Geometry.fromWkb(wkb);
+ assertNotNull(geometry);
+ assertArrayEquals(wkb, geometry.toWkb());
+ assertEquals(0, geometry.srid());
}
/** Tests for Geometry EWKB parsing. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
new file mode 100644
index 000000000000..425abac2efed
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util;
+
+import org.apache.spark.unsafe.types.GeographyVal;
+import org.apache.spark.unsafe.types.GeometryVal;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+
+/**
+ * Test suite for the ST expression utility class.
+ */
+class STUtilsSuite {
+
+ /** Common test data used across multiple tests below. */
+
+ private final byte[] testWkb = new byte[] {0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, (byte)0xF0, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40};
+
+ // A sample Geography byte array for testing purposes, representing a POINT(1 2) with SRID 4326.
+ private final byte[] testGeographySrid = new byte[] {(byte)0xE6, 0x10, 0x00, 0x00};
+ private final byte[] testGeographyBytes;
+
+ // A sample Geometry byte array for testing purposes, representing a POINT(1 2) with SRID 0.
+ private final byte[] testGeometrySrid = new byte[] {0x00, 0x00, 0x00, 0x00};
+ private final byte[] testGeometryBytes;
+
+ {
+ int sridLen = testGeographySrid.length;
+ int wkbLen = testWkb.length;
+ // Initialize GEOGRAPHY.
+ testGeographyBytes = new byte[sridLen + wkbLen];
+ System.arraycopy(testGeographySrid, 0, testGeographyBytes, 0, sridLen);
+ System.arraycopy(testWkb, 0, testGeographyBytes, sridLen, wkbLen);
+ // Initialize GEOMETRY.
+ testGeometryBytes = new byte[sridLen + wkbLen];
+ System.arraycopy(testGeometrySrid, 0, testGeometryBytes, 0, sridLen);
+ System.arraycopy(testWkb, 0, testGeometryBytes, sridLen, wkbLen);
+ }
+
+ /** Tests for ST expression utility methods. */
+
+ // ST_AsBinary
+ @Test
+ void testStAsBinaryGeography() {
+ GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
+ byte[] geographyWkb = STUtils.stAsBinary(geographyVal);
+ assertNotNull(geographyWkb);
+ assertArrayEquals(testWkb, geographyWkb);
+ }
+
+ @Test
+ void testStAsBinaryGeometry() {
+ GeometryVal geometryVal = GeometryVal.fromBytes(testGeometryBytes);
+ byte[] geometryWkb = STUtils.stAsBinary(geometryVal);
+ assertNotNull(geometryWkb);
+ assertArrayEquals(testWkb, geometryWkb);
+ }
+
+ // ST_GeogFromWKB
+ @Test
+ void testStGeogFromWKB() {
+ GeographyVal geographyVal = STUtils.stGeogFromWKB(testWkb);
+ assertNotNull(geographyVal);
+ assertArrayEquals(testGeographyBytes, geographyVal.getBytes());
+ }
+
+ // ST_GeomFromWKB
+ @Test
+ void testStGeomFromWKB() {
+ GeometryVal geometryVal = STUtils.stGeomFromWKB(testWkb);
+ assertNotNull(geometryVal);
+ assertArrayEquals(testGeometryBytes, geometryVal.getBytes());
+ }
+
+}
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
new file mode 100644
index 000000000000..465f27b3c06f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
@@ -0,0 +1,13 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result
+-- !query analysis
+Project [hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040))) AS result#x]
++- OneRowRelation
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result
+-- !query analysis
+Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040))) AS result#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
new file mode 100644
index 000000000000..465f27b3c06f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
@@ -0,0 +1,13 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result
+-- !query analysis
+Project [hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040))) AS result#x]
++- OneRowRelation
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result
+-- !query analysis
+Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040))) AS result#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql
new file mode 100644
index 000000000000..720ce3767dc4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql
@@ -0,0 +1 @@
+--IMPORT st-functions.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
new file mode 100644
index 000000000000..02bb526bee25
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
@@ -0,0 +1,5 @@
+---- ST reader/writer expressions
+
+-- WKB (Well-Known Binary) round-trip tests for GEOGRAPHY and GEOMETRY types.
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result;
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result;
diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
new file mode 100644
index 000000000000..b77c11f0dd1d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
@@ -0,0 +1,15 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result
+-- !query schema
+struct
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result
+-- !query schema
+struct
+-- !query output
+0101000000000000000000F03F0000000000000040
diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
new file mode 100644
index 000000000000..b77c11f0dd1d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
@@ -0,0 +1,15 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result
+-- !query schema
+struct
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result
+-- !query schema
+struct
+-- !query output
+0101000000000000000000F03F0000000000000040
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
new file mode 100644
index 000000000000..376466fe71e3
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+class STExpressionsSuite
+ extends QueryTest
+ with SharedSparkSession
+ with ExpressionEvalHelper {
+
+ // Private common constants used across several tests.
+ private val defaultGeographyType: DataType =
+ GeographyType(ExpressionDefaults.DEFAULT_GEOGRAPHY_SRID)
+ private val defaultGeometryType: DataType =
+ GeometryType(ExpressionDefaults.DEFAULT_GEOMETRY_SRID)
+
+ /** ST reader/writer expressions. */
+
+ test("ST_AsBinary") {
+ // Test data: WKB representation of POINT(1 2).
+ val wkb = Hex.unhex("0101000000000000000000f03f0000000000000040".getBytes())
+ val wkbLiteral = Literal.create(wkb, BinaryType)
+ // ST_GeogFromWKB and ST_AsBinary.
+ val geographyExpression = ST_GeogFromWKB(wkbLiteral)
+ assert(geographyExpression.dataType.sameType(defaultGeographyType))
+ checkEvaluation(ST_AsBinary(geographyExpression), wkb)
+ // ST_GeomFromWKB and ST_AsBinary.
+ val geometryExpression = ST_GeomFromWKB(wkbLiteral)
+ assert(geometryExpression.dataType.sameType(defaultGeometryType))
+ checkEvaluation(ST_AsBinary(geometryExpression), wkb)
+ }
+
+}
From b4ce4bfaf1a3d700635f6c529de6e3dc28c3a2ad Mon Sep 17 00:00:00 2001
From: Uros Bojanic
Date: Wed, 29 Oct 2025 23:04:41 +0100
Subject: [PATCH 2/3] Fix failing tests
---
.../catalyst/expressions/st/stExpressions.scala | 16 ++++++++--------
.../sql-functions/sql-expression-schema.md | 3 +++
.../apache/spark/sql/STExpressionsSuite.scala | 2 +-
.../sql/expressions/ExpressionInfoSuite.scala | 2 +-
4 files changed, 13 insertions(+), 10 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
index a2493e154ff3..12dac4355741 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
@@ -50,10 +50,10 @@ private[sql] object ExpressionDefaults {
""",
examples = """
Examples:
- > SELECT hex(_FUNC_(st_geogfromwkb(X'0101000000000000000000f03f0000000000000040')));
- 0101000000000000000000f03f0000000000000040
- > SELECT hex(_FUNC_(st_geomfromwkb(X'0101000000000000000000f03f0000000000000040')));
- 0101000000000000000000f03f0000000000000040
+ > SELECT hex(_FUNC_(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040')));
+ 0101000000000000000000F03F0000000000000040
+ > SELECT hex(_FUNC_(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040')));
+ 0101000000000000000000F03F0000000000000040
""",
since = "4.1.0",
group = "st_funcs"
@@ -100,8 +100,8 @@ case class ST_AsBinary(geo: Expression)
""",
examples = """
Examples:
- > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000f03f0000000000000040')));
- 0101000000000000000000f03f0000000000000040
+ > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000F03F0000000000000040')));
+ 0101000000000000000000F03F0000000000000040
""",
since = "4.1.0",
group = "st_funcs"
@@ -144,8 +144,8 @@ case class ST_GeogFromWKB(wkb: Expression)
""",
examples = """
Examples:
- > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000f03f0000000000000040')));
- 0101000000000000000000f03f0000000000000040
+ > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000F03F0000000000000040')));
+ 0101000000000000000000F03F0000000000000040
""",
since = "4.1.0",
group = "st_funcs"
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index f192a020f576..56146237a98b 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -287,6 +287,9 @@
| org.apache.spark.sql.catalyst.expressions.Rint | rint | SELECT rint(12.3456) | struct |
| org.apache.spark.sql.catalyst.expressions.Round | round | SELECT round(2.5, 0) | struct |
| org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct |
+| org.apache.spark.sql.catalyst.expressions.ST_AsBinary | st_asbinary | SELECT hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct |
+| org.apache.spark.sql.catalyst.expressions.ST_GeogFromWKB | st_geogfromwkb | SELECT hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct |
+| org.apache.spark.sql.catalyst.expressions.ST_GeomFromWKB | st_geomfromwkb | SELECT hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct |
| org.apache.spark.sql.catalyst.expressions.SchemaOfCsv | schema_of_csv | SELECT schema_of_csv('1,abc') | struct |
| org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json | SELECT schema_of_json('[{"col":0}]') | struct |
| org.apache.spark.sql.catalyst.expressions.SchemaOfXml | schema_of_xml | SELECT schema_of_xml('1
') | struct1
):string> |
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
index 376466fe71e3..46c59bbd6d0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
@@ -36,7 +36,7 @@ class STExpressionsSuite
test("ST_AsBinary") {
// Test data: WKB representation of POINT(1 2).
- val wkb = Hex.unhex("0101000000000000000000f03f0000000000000040".getBytes())
+ val wkb = Hex.unhex("0101000000000000000000F03F0000000000000040".getBytes())
val wkbLiteral = Literal.create(wkb, BinaryType)
// ST_GeogFromWKB and ST_AsBinary.
val geographyExpression = ST_GeogFromWKB(wkbLiteral)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index e90907b904bd..33f128409f7e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -60,7 +60,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
"predicate_funcs", "conditional_funcs", "conversion_funcs", "csv_funcs", "datetime_funcs",
"generator_funcs", "hash_funcs", "json_funcs", "lambda_funcs", "map_funcs", "math_funcs",
"misc_funcs", "string_funcs", "struct_funcs", "window_funcs", "xml_funcs", "table_funcs",
- "url_funcs", "variant_funcs").sorted
+ "url_funcs", "variant_funcs", "st_funcs").sorted
val invalidGroupName = "invalid_group_funcs"
checkError(
exception = intercept[SparkIllegalArgumentException] {
From 130d64ce9cfa7f7318581f85795d6b3e8a51f65e Mon Sep 17 00:00:00 2001
From: Wenchen Fan
Date: Thu, 30 Oct 2025 10:21:03 +0800
Subject: [PATCH 3/3] Apply suggestions from code review
---
.../spark/sql/catalyst/expressions/st/stExpressions.scala | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
index 12dac4355741..12f3d13746d5 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
@@ -64,8 +64,8 @@ case class ST_AsBinary(geo: Expression)
with UnaryLike[Expression] {
override def inputTypes: Seq[AbstractDataType] = Seq(
- TypeCollection(GeographyType, GeometryType)
- )
+ TypeCollection(GeographyType, GeometryType)
+ )
override lazy val replacement: Expression = StaticInvoke(
classOf[STUtils],
@@ -80,7 +80,7 @@ case class ST_AsBinary(geo: Expression)
override def child: Expression = geo
override protected def withNewChildInternal(newChild: Expression): ST_AsBinary =
- copy(geo = newChild)
+ copy(geo = newChild)
}
/** ST reader expressions. */