From d822ba8f2bbde804b3a541e1a49d383d4539e421 Mon Sep 17 00:00:00 2001 From: Uros Bojanic Date: Wed, 29 Oct 2025 18:47:16 +0100 Subject: [PATCH 1/3] Initial commit --- .../catalyst/expressions/ExpressionInfo.java | 2 +- .../spark/sql/catalyst/util/Geography.java | 5 +- .../spark/sql/catalyst/util/Geometry.java | 5 +- .../spark/sql/catalyst/util/STUtils.java | 70 +++++++ .../catalyst/analysis/FunctionRegistry.scala | 5 + .../sql/catalyst/expressions/literals.scala | 2 + .../expressions/st/stExpressions.scala | 174 ++++++++++++++++++ .../util/GeographyExecutionSuite.java | 26 +-- .../catalyst/util/GeometryExecutionSuite.java | 26 +-- .../spark/sql/catalyst/util/StUtilsSuite.java | 93 ++++++++++ .../nonansi/st-functions.sql.out | 13 ++ .../analyzer-results/st-functions.sql.out | 13 ++ .../sql-tests/inputs/nonansi/st-functions.sql | 1 + .../sql-tests/inputs/st-functions.sql | 5 + .../results/nonansi/st-functions.sql.out | 15 ++ .../sql-tests/results/st-functions.sql.out | 15 ++ .../apache/spark/sql/STExpressionsSuite.scala | 51 +++++ 17 files changed, 494 insertions(+), 27 deletions(-) create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java create mode 100755 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/st-functions.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/st-functions.sql.out create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java index 310d18ddb348..dd56c650c073 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java @@ -48,7 +48,7 @@ public class ExpressionInfo { "collection_funcs", "predicate_funcs", "conditional_funcs", "conversion_funcs", "csv_funcs", "datetime_funcs", "generator_funcs", "hash_funcs", "json_funcs", "lambda_funcs", "map_funcs", "math_funcs", "misc_funcs", "string_funcs", "struct_funcs", - "window_funcs", "xml_funcs", "table_funcs", "url_funcs", "variant_funcs")); + "window_funcs", "xml_funcs", "table_funcs", "url_funcs", "variant_funcs", "st_funcs")); private static final Set validSources = new HashSet<>(Arrays.asList("built-in", "hive", "python_udf", "scala_udf", "sql_udf", diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java index f7b0df8990d3..c46c2368832f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java @@ -77,7 +77,10 @@ public Geography copy() { // Returns a Geography object with the specified SRID value by parsing the input WKB. public static Geography fromWkb(byte[] wkb, int srid) { - throw new UnsupportedOperationException("Geography WKB parsing is not yet supported."); + byte[] bytes = new byte[HEADER_SIZE + wkb.length]; + ByteBuffer.wrap(bytes).order(DEFAULT_ENDIANNESS).putInt(srid); + System.arraycopy(wkb, 0, bytes, WKB_OFFSET, wkb.length); + return fromBytes(bytes); } // Overload for the WKB reader where we use the default SRID for Geography. diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java index 81cdaeb97ce2..c4b6e5d0e4bd 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java @@ -77,7 +77,10 @@ public Geometry copy() { // Returns a Geometry object with the specified SRID value by parsing the input WKB. public static Geometry fromWkb(byte[] wkb, int srid) { - throw new UnsupportedOperationException("Geometry WKB parsing is not yet supported."); + byte[] bytes = new byte[HEADER_SIZE + wkb.length]; + ByteBuffer.wrap(bytes).order(DEFAULT_ENDIANNESS).putInt(srid); + System.arraycopy(wkb, 0, bytes, WKB_OFFSET, wkb.length); + return fromBytes(bytes); } // Overload for the WKB reader where we use the default SRID for Geometry. diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java new file mode 100644 index 000000000000..641382a0f959 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.util; + +import org.apache.spark.unsafe.types.GeographyVal; +import org.apache.spark.unsafe.types.GeometryVal; + +// This class defines static methods that used to implement ST expressions using `StaticInvoke`. +public final class STUtils { + + /** Conversion methods from physical values to Geography/Geometry objects. */ + + // Converts a GEOGRAPHY from its physical value to the corresponding `Geography` object + static Geography fromPhysVal(GeographyVal value) { + return Geography.fromBytes(value.getBytes()); + } + + // Converts a GEOMETRY from its physical value to the corresponding `Geometry` object + static Geometry fromPhysVal(GeometryVal value) { + return Geometry.fromBytes(value.getBytes()); + } + + /** Conversion methods from Geography/Geometry objects to physical values. */ + + // Converts a `Geography` object to the corresponding GEOGRAPHY physical value. + static GeographyVal toPhysVal(Geography g) { + return g.getValue(); + } + + // Converts a `Geometry` object to the corresponding GEOMETRY physical value. + static GeometryVal toPhysVal(Geometry g) { + return g.getValue(); + } + + /** Methods for implementing ST expressions. */ + + // ST_AsBinary + public static byte[] stAsBinary(GeographyVal geo) { + return fromPhysVal(geo).toWkb(); + } + + public static byte[] stAsBinary(GeometryVal geo) { + return fromPhysVal(geo).toWkb(); + } + + // ST_GeogFromWKB + public static GeographyVal stGeogFromWKB(byte[] wkb) { + return toPhysVal(Geography.fromWkb(wkb)); + } + + // ST_GeomFromWKB + public static GeometryVal stGeomFromWKB(byte[] wkb) { + return toPhysVal(Geometry.fromWkb(wkb)); + } + +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 0a596a8bd63e..97f8cbc23b7a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -873,6 +873,11 @@ object FunctionRegistry { expression[SchemaOfVariantAgg]("schema_of_variant_agg"), expression[ToVariantObject]("to_variant_object"), + // Spatial + expression[ST_AsBinary]("st_asbinary"), + expression[ST_GeogFromWKB]("st_geogfromwkb"), + expression[ST_GeomFromWKB]("st_geomfromwkb"), + // cast expression[Cast]("cast"), // Cast aliases (SPARK-16730) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index c799415dfc70..710bd671b29e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -252,6 +252,8 @@ object Literal { case PhysicalNullType => true case PhysicalShortType => v.isInstanceOf[Short] case _: PhysicalStringType => v.isInstanceOf[UTF8String] + case _: PhysicalGeographyType => v.isInstanceOf[GeographyVal] + case _: PhysicalGeometryType => v.isInstanceOf[GeometryVal] case PhysicalVariantType => v.isInstanceOf[VariantVal] case st: PhysicalStructType => v.isInstanceOf[InternalRow] && { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala new file mode 100755 index 000000000000..a2493e154ff3 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.sql.catalyst.expressions.objects._ +import org.apache.spark.sql.catalyst.trees._ +import org.apache.spark.sql.catalyst.util.{Geography, Geometry, STUtils} +import org.apache.spark.sql.types._ + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// This file defines expressions for geospatial operations. +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +// Useful constants for ST expressions. +private[sql] object ExpressionDefaults { + val DEFAULT_GEOGRAPHY_SRID: Int = Geography.DEFAULT_SRID + val DEFAULT_GEOMETRY_SRID: Int = Geometry.DEFAULT_SRID +} + +/** ST writer expressions. */ + +/** + * Returns the input GEOGRAPHY or GEOMETRY value in WKB format. + * See https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary + * for more details on the WKB format. + */ +@ExpressionDescription( + usage = "_FUNC_(geo) - Returns the geospatial value (value of type GEOGRAPHY or GEOMETRY) " + + "in WKB format.", + arguments = """ + Arguments: + * geo - A geospatial value, either a GEOGRAPHY or a GEOMETRY. + """, + examples = """ + Examples: + > SELECT hex(_FUNC_(st_geogfromwkb(X'0101000000000000000000f03f0000000000000040'))); + 0101000000000000000000f03f0000000000000040 + > SELECT hex(_FUNC_(st_geomfromwkb(X'0101000000000000000000f03f0000000000000040'))); + 0101000000000000000000f03f0000000000000040 + """, + since = "4.1.0", + group = "st_funcs" +) +case class ST_AsBinary(geo: Expression) + extends RuntimeReplaceable + with ImplicitCastInputTypes + with UnaryLike[Expression] { + + override def inputTypes: Seq[AbstractDataType] = Seq( + TypeCollection(GeographyType, GeometryType) + ) + + override lazy val replacement: Expression = StaticInvoke( + classOf[STUtils], + BinaryType, + "stAsBinary", + Seq(geo), + returnNullable = false + ) + + override def prettyName: String = "st_asbinary" + + override def child: Expression = geo + + override protected def withNewChildInternal(newChild: Expression): ST_AsBinary = + copy(geo = newChild) +} + +/** ST reader expressions. */ + +/** + * Parses the WKB description of a geography and returns the corresponding GEOGRAPHY value. The SRID + * value of the returned GEOGRAPHY value is 4326. + * See https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary + * for more details on the WKB format. + */ +@ExpressionDescription( + usage = "_FUNC_(wkb) - Parses the WKB description of a geography and returns the corresponding " + + "GEOGRAPHY value.", + arguments = """ + Arguments: + * wkb - A BINARY value in WKB format, representing a GEOGRAPHY value. + """, + examples = """ + Examples: + > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000f03f0000000000000040'))); + 0101000000000000000000f03f0000000000000040 + """, + since = "4.1.0", + group = "st_funcs" +) +case class ST_GeogFromWKB(wkb: Expression) + extends RuntimeReplaceable + with ImplicitCastInputTypes + with UnaryLike[Expression] { + + override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType) + + override lazy val replacement: Expression = StaticInvoke( + classOf[STUtils], + GeographyType(ExpressionDefaults.DEFAULT_GEOGRAPHY_SRID), + "stGeogFromWKB", + Seq(wkb), + returnNullable = false + ) + + override def prettyName: String = "st_geogfromwkb" + + override def child: Expression = wkb + + override protected def withNewChildInternal(newChild: Expression): ST_GeogFromWKB = + copy(wkb = newChild) +} + +/** + * Parses the WKB description of a geometry and returns the corresponding GEOMETRY value. The SRID + * value of the returned GEOMETRY value is 0. + * See https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary + * for more details on the WKB format. + */ +@ExpressionDescription( + usage = "_FUNC_(wkb) - Parses the WKB description of a geometry and returns the corresponding " + + "GEOMETRY value.", + arguments = """ + Arguments: + * wkb - A BINARY value in WKB format, representing a GEOMETRY value. + """, + examples = """ + Examples: + > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000f03f0000000000000040'))); + 0101000000000000000000f03f0000000000000040 + """, + since = "4.1.0", + group = "st_funcs" +) +case class ST_GeomFromWKB(wkb: Expression) + extends RuntimeReplaceable + with ImplicitCastInputTypes + with UnaryLike[Expression] { + + override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType) + + override lazy val replacement: Expression = StaticInvoke( + classOf[STUtils], + GeometryType(ExpressionDefaults.DEFAULT_GEOMETRY_SRID), + "stGeomFromWKB", + Seq(wkb), + returnNullable = false + ) + + override def prettyName: String = "st_geomfromwkb" + + override def child: Expression = wkb + + override protected def withNewChildInternal(newChild: Expression): ST_GeomFromWKB = + copy(wkb = newChild) +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java index de1f4d916d2e..f7a0a1929bc1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java @@ -83,23 +83,25 @@ void testDefaultSrid() { /** Tests for Geography WKB parsing. */ @Test - void testFromWkbWithSridUnsupported() { + void testFromWkbWithSridRudimentary() { byte[] wkb = new byte[]{1, 2, 3}; - UnsupportedOperationException exception = assertThrows( - UnsupportedOperationException.class, - () -> Geography.fromWkb(wkb, 0) - ); - assertEquals("Geography WKB parsing is not yet supported.", exception.getMessage()); + // Note: This is a rudimentary WKB handling test; actual WKB parsing is not yet implemented. + // Once we implement the appropriate parsing logic, this test should be updated accordingly. + Geography geography = Geography.fromWkb(wkb, 4326); + assertNotNull(geography); + assertArrayEquals(wkb, geography.toWkb()); + assertEquals(4326, geography.srid()); } @Test - void testFromWkbNoSridUnsupported() { + void testFromWkbNoSridRudimentary() { byte[] wkb = new byte[]{1, 2, 3}; - UnsupportedOperationException exception = assertThrows( - UnsupportedOperationException.class, - () -> Geography.fromWkb(wkb) - ); - assertEquals("Geography WKB parsing is not yet supported.", exception.getMessage()); + // Note: This is a rudimentary WKB handling test; actual WKB parsing is not yet implemented. + // Once we implement the appropriate parsing logic, this test should be updated accordingly. + Geography geography = Geography.fromWkb(wkb); + assertNotNull(geography); + assertArrayEquals(wkb, geography.toWkb()); + assertEquals(4326, geography.srid()); } /** Tests for Geography EWKB parsing. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java index 17950f9cad0d..be43596b7f5a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java @@ -83,23 +83,25 @@ void testDefaultSrid() { /** Tests for Geometry WKB parsing. */ @Test - void testFromWkbWithSridUnsupported() { + void testFromWkbWithSridRudimentary() { byte[] wkb = new byte[]{1, 2, 3}; - UnsupportedOperationException exception = assertThrows( - UnsupportedOperationException.class, - () -> Geometry.fromWkb(wkb, 0) - ); - assertEquals("Geometry WKB parsing is not yet supported.", exception.getMessage()); + // Note: This is a rudimentary WKB handling test; actual WKB parsing is not yet implemented. + // Once we implement the appropriate parsing logic, this test should be updated accordingly. + Geometry geometry = Geometry.fromWkb(wkb, 4326); + assertNotNull(geometry); + assertArrayEquals(wkb, geometry.toWkb()); + assertEquals(4326, geometry.srid()); } @Test - void testFromWkbNoSridUnsupported() { + void testFromWkbNoSridRudimentary() { byte[] wkb = new byte[]{1, 2, 3}; - UnsupportedOperationException exception = assertThrows( - UnsupportedOperationException.class, - () -> Geometry.fromWkb(wkb) - ); - assertEquals("Geometry WKB parsing is not yet supported.", exception.getMessage()); + // Note: This is a rudimentary WKB handling test; actual WKB parsing is not yet implemented. + // Once we implement the appropriate parsing logic, this test should be updated accordingly. + Geometry geometry = Geometry.fromWkb(wkb); + assertNotNull(geometry); + assertArrayEquals(wkb, geometry.toWkb()); + assertEquals(0, geometry.srid()); } /** Tests for Geometry EWKB parsing. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java new file mode 100644 index 000000000000..425abac2efed --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util; + +import org.apache.spark.unsafe.types.GeographyVal; +import org.apache.spark.unsafe.types.GeometryVal; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + + +/** + * Test suite for the ST expression utility class. + */ +class STUtilsSuite { + + /** Common test data used across multiple tests below. */ + + private final byte[] testWkb = new byte[] {0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, (byte)0xF0, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40}; + + // A sample Geography byte array for testing purposes, representing a POINT(1 2) with SRID 4326. + private final byte[] testGeographySrid = new byte[] {(byte)0xE6, 0x10, 0x00, 0x00}; + private final byte[] testGeographyBytes; + + // A sample Geometry byte array for testing purposes, representing a POINT(1 2) with SRID 0. + private final byte[] testGeometrySrid = new byte[] {0x00, 0x00, 0x00, 0x00}; + private final byte[] testGeometryBytes; + + { + int sridLen = testGeographySrid.length; + int wkbLen = testWkb.length; + // Initialize GEOGRAPHY. + testGeographyBytes = new byte[sridLen + wkbLen]; + System.arraycopy(testGeographySrid, 0, testGeographyBytes, 0, sridLen); + System.arraycopy(testWkb, 0, testGeographyBytes, sridLen, wkbLen); + // Initialize GEOMETRY. + testGeometryBytes = new byte[sridLen + wkbLen]; + System.arraycopy(testGeometrySrid, 0, testGeometryBytes, 0, sridLen); + System.arraycopy(testWkb, 0, testGeometryBytes, sridLen, wkbLen); + } + + /** Tests for ST expression utility methods. */ + + // ST_AsBinary + @Test + void testStAsBinaryGeography() { + GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes); + byte[] geographyWkb = STUtils.stAsBinary(geographyVal); + assertNotNull(geographyWkb); + assertArrayEquals(testWkb, geographyWkb); + } + + @Test + void testStAsBinaryGeometry() { + GeometryVal geometryVal = GeometryVal.fromBytes(testGeometryBytes); + byte[] geometryWkb = STUtils.stAsBinary(geometryVal); + assertNotNull(geometryWkb); + assertArrayEquals(testWkb, geometryWkb); + } + + // ST_GeogFromWKB + @Test + void testStGeogFromWKB() { + GeographyVal geographyVal = STUtils.stGeogFromWKB(testWkb); + assertNotNull(geographyVal); + assertArrayEquals(testGeographyBytes, geographyVal.getBytes()); + } + + // ST_GeomFromWKB + @Test + void testStGeomFromWKB() { + GeometryVal geometryVal = STUtils.stGeomFromWKB(testWkb); + assertNotNull(geometryVal); + assertArrayEquals(testGeometryBytes, geometryVal.getBytes()); + } + +} diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out new file mode 100644 index 000000000000..465f27b3c06f --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out @@ -0,0 +1,13 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result +-- !query analysis +Project [hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040))) AS result#x] ++- OneRowRelation + + +-- !query +SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result +-- !query analysis +Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040))) AS result#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out new file mode 100644 index 000000000000..465f27b3c06f --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out @@ -0,0 +1,13 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result +-- !query analysis +Project [hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040))) AS result#x] ++- OneRowRelation + + +-- !query +SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result +-- !query analysis +Project [hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040))) AS result#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql new file mode 100644 index 000000000000..720ce3767dc4 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql @@ -0,0 +1 @@ +--IMPORT st-functions.sql diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql new file mode 100644 index 000000000000..02bb526bee25 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql @@ -0,0 +1,5 @@ +---- ST reader/writer expressions + +-- WKB (Well-Known Binary) round-trip tests for GEOGRAPHY and GEOMETRY types. +SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result; +SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result; diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out new file mode 100644 index 000000000000..b77c11f0dd1d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out @@ -0,0 +1,15 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result +-- !query schema +struct +-- !query output +0101000000000000000000F03F0000000000000040 + + +-- !query +SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result +-- !query schema +struct +-- !query output +0101000000000000000000F03F0000000000000040 diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out new file mode 100644 index 000000000000..b77c11f0dd1d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out @@ -0,0 +1,15 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SELECT hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result +-- !query schema +struct +-- !query output +0101000000000000000000F03F0000000000000040 + + +-- !query +SELECT hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))) AS result +-- !query schema +struct +-- !query output +0101000000000000000000F03F0000000000000040 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala new file mode 100644 index 000000000000..376466fe71e3 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ + +class STExpressionsSuite + extends QueryTest + with SharedSparkSession + with ExpressionEvalHelper { + + // Private common constants used across several tests. + private val defaultGeographyType: DataType = + GeographyType(ExpressionDefaults.DEFAULT_GEOGRAPHY_SRID) + private val defaultGeometryType: DataType = + GeometryType(ExpressionDefaults.DEFAULT_GEOMETRY_SRID) + + /** ST reader/writer expressions. */ + + test("ST_AsBinary") { + // Test data: WKB representation of POINT(1 2). + val wkb = Hex.unhex("0101000000000000000000f03f0000000000000040".getBytes()) + val wkbLiteral = Literal.create(wkb, BinaryType) + // ST_GeogFromWKB and ST_AsBinary. + val geographyExpression = ST_GeogFromWKB(wkbLiteral) + assert(geographyExpression.dataType.sameType(defaultGeographyType)) + checkEvaluation(ST_AsBinary(geographyExpression), wkb) + // ST_GeomFromWKB and ST_AsBinary. + val geometryExpression = ST_GeomFromWKB(wkbLiteral) + assert(geometryExpression.dataType.sameType(defaultGeometryType)) + checkEvaluation(ST_AsBinary(geometryExpression), wkb) + } + +} From b4ce4bfaf1a3d700635f6c529de6e3dc28c3a2ad Mon Sep 17 00:00:00 2001 From: Uros Bojanic Date: Wed, 29 Oct 2025 23:04:41 +0100 Subject: [PATCH 2/3] Fix failing tests --- .../catalyst/expressions/st/stExpressions.scala | 16 ++++++++-------- .../sql-functions/sql-expression-schema.md | 3 +++ .../apache/spark/sql/STExpressionsSuite.scala | 2 +- .../sql/expressions/ExpressionInfoSuite.scala | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala index a2493e154ff3..12dac4355741 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala @@ -50,10 +50,10 @@ private[sql] object ExpressionDefaults { """, examples = """ Examples: - > SELECT hex(_FUNC_(st_geogfromwkb(X'0101000000000000000000f03f0000000000000040'))); - 0101000000000000000000f03f0000000000000040 - > SELECT hex(_FUNC_(st_geomfromwkb(X'0101000000000000000000f03f0000000000000040'))); - 0101000000000000000000f03f0000000000000040 + > SELECT hex(_FUNC_(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))); + 0101000000000000000000F03F0000000000000040 + > SELECT hex(_FUNC_(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040'))); + 0101000000000000000000F03F0000000000000040 """, since = "4.1.0", group = "st_funcs" @@ -100,8 +100,8 @@ case class ST_AsBinary(geo: Expression) """, examples = """ Examples: - > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000f03f0000000000000040'))); - 0101000000000000000000f03f0000000000000040 + > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000F03F0000000000000040'))); + 0101000000000000000000F03F0000000000000040 """, since = "4.1.0", group = "st_funcs" @@ -144,8 +144,8 @@ case class ST_GeogFromWKB(wkb: Expression) """, examples = """ Examples: - > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000f03f0000000000000040'))); - 0101000000000000000000f03f0000000000000040 + > SELECT hex(st_asbinary(_FUNC_(X'0101000000000000000000F03F0000000000000040'))); + 0101000000000000000000F03F0000000000000040 """, since = "4.1.0", group = "st_funcs" diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index f192a020f576..56146237a98b 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -287,6 +287,9 @@ | org.apache.spark.sql.catalyst.expressions.Rint | rint | SELECT rint(12.3456) | struct | | org.apache.spark.sql.catalyst.expressions.Round | round | SELECT round(2.5, 0) | struct | | org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | +| org.apache.spark.sql.catalyst.expressions.ST_AsBinary | st_asbinary | SELECT hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct | +| org.apache.spark.sql.catalyst.expressions.ST_GeogFromWKB | st_geogfromwkb | SELECT hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct | +| org.apache.spark.sql.catalyst.expressions.ST_GeomFromWKB | st_geomfromwkb | SELECT hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040'))) | struct | | org.apache.spark.sql.catalyst.expressions.SchemaOfCsv | schema_of_csv | SELECT schema_of_csv('1,abc') | struct | | org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json | SELECT schema_of_json('[{"col":0}]') | struct | | org.apache.spark.sql.catalyst.expressions.SchemaOfXml | schema_of_xml | SELECT schema_of_xml('

1

') | struct1

):string> | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala index 376466fe71e3..46c59bbd6d0c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala @@ -36,7 +36,7 @@ class STExpressionsSuite test("ST_AsBinary") { // Test data: WKB representation of POINT(1 2). - val wkb = Hex.unhex("0101000000000000000000f03f0000000000000040".getBytes()) + val wkb = Hex.unhex("0101000000000000000000F03F0000000000000040".getBytes()) val wkbLiteral = Literal.create(wkb, BinaryType) // ST_GeogFromWKB and ST_AsBinary. val geographyExpression = ST_GeogFromWKB(wkbLiteral) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala index e90907b904bd..33f128409f7e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -60,7 +60,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { "predicate_funcs", "conditional_funcs", "conversion_funcs", "csv_funcs", "datetime_funcs", "generator_funcs", "hash_funcs", "json_funcs", "lambda_funcs", "map_funcs", "math_funcs", "misc_funcs", "string_funcs", "struct_funcs", "window_funcs", "xml_funcs", "table_funcs", - "url_funcs", "variant_funcs").sorted + "url_funcs", "variant_funcs", "st_funcs").sorted val invalidGroupName = "invalid_group_funcs" checkError( exception = intercept[SparkIllegalArgumentException] { From 130d64ce9cfa7f7318581f85795d6b3e8a51f65e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 30 Oct 2025 10:21:03 +0800 Subject: [PATCH 3/3] Apply suggestions from code review --- .../spark/sql/catalyst/expressions/st/stExpressions.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala index 12dac4355741..12f3d13746d5 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala @@ -64,8 +64,8 @@ case class ST_AsBinary(geo: Expression) with UnaryLike[Expression] { override def inputTypes: Seq[AbstractDataType] = Seq( - TypeCollection(GeographyType, GeometryType) - ) + TypeCollection(GeographyType, GeometryType) + ) override lazy val replacement: Expression = StaticInvoke( classOf[STUtils], @@ -80,7 +80,7 @@ case class ST_AsBinary(geo: Expression) override def child: Expression = geo override protected def withNewChildInternal(newChild: Expression): ST_AsBinary = - copy(geo = newChild) + copy(geo = newChild) } /** ST reader expressions. */