Skip to content

Commit

Permalink
[DX] spark sql support GeometryUDT type to create/select based on sed…
Browse files Browse the repository at this point in the history
…ona.
  • Loading branch information
freamdx committed May 7, 2024
1 parent 45ba922 commit cf2b0fe
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 1 deletion.
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@
<!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
<leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
<kubernetes-client.version>5.12.2</kubernetes-client.version>
<sedona.version>1.5.2</sedona.version>

<test.java.home>${java.home}</test.java.home>

Expand Down
5 changes: 5 additions & 0 deletions sql/catalyst/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-netty</artifactId>
</dependency>
<dependency>
<groupId>org.apache.sedona</groupId>
<artifactId>sedona-spark-shaded-3.0_${scala.binary.version}</artifactId>
<version>${sedona.version}</version>
</dependency>
</dependencies>
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
import org.apache.spark.sql.errors.QueryParsingErrors
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
import org.apache.spark.util.Utils.isTesting
Expand Down Expand Up @@ -2676,6 +2677,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
DecimalType(precision.getText.toInt, scale.getText.toInt)
case ("void", Nil) => NullType
case ("interval", Nil) => CalendarIntervalType
case ("geometry", Nil) => GeometryUDT
case (dt @ ("character" | "char" | "varchar"), Nil) =>
throw QueryParsingErrors.charTypeMissingLengthError(dt, ctx)
case (dt, params) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros, perio
import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand}
import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

Expand Down Expand Up @@ -130,6 +131,8 @@ object HiveResult {
HIVE_STYLE,
startField,
endField)
case (geom: org.locationtech.jts.geom.Geometry, GeometryUDT) =>
org.apache.sedona.common.utils.GeomUtils.getWKT(geom)
case (other, _: UserDefinedType[_]) => other.toString
}
}
5 changes: 5 additions & 0 deletions sql/hive-thriftserver/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</dependency>
<dependency>
<groupId>org.apache.sedona</groupId>
<artifactId>sedona-spark-shaded-3.0_${scala.binary.version}</artifactId>
<version>${sedona.version}</version>
</dependency>
</dependencies>
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ public TableSchema(Schema schema) {
this(schema.getFieldSchemas());
}

public TableSchema(List<FieldSchema> fieldSchemas, List<String> originTypes) {
int pos = 1;
for (FieldSchema field : fieldSchemas) {
String originType = originTypes.get(pos - 1);
String sqlType = UDTUtil.existsType(originType) ? UDTUtil.getType(originType) : field.getType();
columns.add(new ColumnDescriptor(field.getName(), field.getComment(), new TypeDescriptor(sqlType), pos++));
}
}

public List<ColumnDescriptor> getColumnDescriptors() {
return new ArrayList<ColumnDescriptor>(columns);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.apache.hive.service.cli;

import org.apache.hadoop.hive.serde2.thrift.Type;
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT$;
import org.apache.spark.sql.sedona_sql.UDT.RasterUDT$;

import java.util.HashMap;
import java.util.Map;

final class UDTUtil {
private final static Map<String, String> udtTypes;

static {
udtTypes = new HashMap<String, String>();
udtTypes.put(GeometryUDT$.MODULE$.simpleString(), Type.STRING_TYPE.getName());
udtTypes.put(RasterUDT$.MODULE$.simpleString(), Type.STRING_TYPE.getName());
}

static boolean existsType(String simpleType) {
return udtTypes.containsKey(simpleType);
}

static String getType(String simpleType) {
return udtTypes.get(simpleType);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,6 @@ object SparkExecuteStatementOperation {
}
new FieldSchema(field.name, attrTypeString, field.getComment.getOrElse(""))
}
new TableSchema(schema.asJava)
new TableSchema(schema.asJava, structType.map { field => field.dataType.simpleString }.asJava)
}
}

0 comments on commit cf2b0fe

Please sign in to comment.