Skip to content
Permalink
Browse files
HIVE-26223: Integrate ESRI GeoSpatial UDFs. (#3283). (Ayush Saxena, r…
…eviewed by Mahesh Kumar Behera)
  • Loading branch information
ayushtkn committed May 19, 2022
1 parent a0cdd36 commit 6bfb6f6a3676ac692d92a08b88f439794f20b488
Showing 120 changed files with 10,833 additions and 0 deletions.
@@ -849,6 +849,12 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.esri.geometry</groupId>
<artifactId>esri-geometry-api</artifactId>
<scope>compile</scope>
<version>2.2.4</version>
</dependency>
</dependencies>
<profiles>
<profile>
@@ -1063,6 +1069,7 @@
<include>org.apache.datasketches:*</include>
<include>org.apache.calcite:*</include>
<include>org.apache.calcite.avatica:avatica</include>
<include>com.esri.geometry:esri-geometry-api</include>
</includes>
</artifactSet>
<filters>
@@ -32,6 +32,89 @@
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.apache.hadoop.hive.ql.udf.esri.ST_Aggr_ConvexHull;
import org.apache.hadoop.hive.ql.udf.esri.ST_Aggr_Union;
import org.apache.hadoop.hive.ql.udf.esri.ST_Area;
import org.apache.hadoop.hive.ql.udf.esri.ST_AsBinary;
import org.apache.hadoop.hive.ql.udf.esri.ST_AsGeoJson;
import org.apache.hadoop.hive.ql.udf.esri.ST_AsJson;
import org.apache.hadoop.hive.ql.udf.esri.ST_AsShape;
import org.apache.hadoop.hive.ql.udf.esri.ST_AsText;
import org.apache.hadoop.hive.ql.udf.esri.ST_Bin;
import org.apache.hadoop.hive.ql.udf.esri.ST_BinEnvelope;
import org.apache.hadoop.hive.ql.udf.esri.ST_Boundary;
import org.apache.hadoop.hive.ql.udf.esri.ST_Buffer;
import org.apache.hadoop.hive.ql.udf.esri.ST_Centroid;
import org.apache.hadoop.hive.ql.udf.esri.ST_Contains;
import org.apache.hadoop.hive.ql.udf.esri.ST_ConvexHull;
import org.apache.hadoop.hive.ql.udf.esri.ST_CoordDim;
import org.apache.hadoop.hive.ql.udf.esri.ST_Crosses;
import org.apache.hadoop.hive.ql.udf.esri.ST_Difference;
import org.apache.hadoop.hive.ql.udf.esri.ST_Dimension;
import org.apache.hadoop.hive.ql.udf.esri.ST_Disjoint;
import org.apache.hadoop.hive.ql.udf.esri.ST_Distance;
import org.apache.hadoop.hive.ql.udf.esri.ST_EndPoint;
import org.apache.hadoop.hive.ql.udf.esri.ST_EnvIntersects;
import org.apache.hadoop.hive.ql.udf.esri.ST_Envelope;
import org.apache.hadoop.hive.ql.udf.esri.ST_Equals;
import org.apache.hadoop.hive.ql.udf.esri.ST_ExteriorRing;
import org.apache.hadoop.hive.ql.udf.esri.ST_GeodesicLengthWGS84;
import org.apache.hadoop.hive.ql.udf.esri.ST_GeomCollection;
import org.apache.hadoop.hive.ql.udf.esri.ST_GeomFromGeoJson;
import org.apache.hadoop.hive.ql.udf.esri.ST_GeomFromJson;
import org.apache.hadoop.hive.ql.udf.esri.ST_GeomFromShape;
import org.apache.hadoop.hive.ql.udf.esri.ST_GeomFromText;
import org.apache.hadoop.hive.ql.udf.esri.ST_GeomFromWKB;
import org.apache.hadoop.hive.ql.udf.esri.ST_GeometryN;
import org.apache.hadoop.hive.ql.udf.esri.ST_GeometryProcessing;
import org.apache.hadoop.hive.ql.udf.esri.ST_InteriorRingN;
import org.apache.hadoop.hive.ql.udf.esri.ST_Intersection;
import org.apache.hadoop.hive.ql.udf.esri.ST_Intersects;
import org.apache.hadoop.hive.ql.udf.esri.ST_Is3D;
import org.apache.hadoop.hive.ql.udf.esri.ST_IsClosed;
import org.apache.hadoop.hive.ql.udf.esri.ST_IsEmpty;
import org.apache.hadoop.hive.ql.udf.esri.ST_IsMeasured;
import org.apache.hadoop.hive.ql.udf.esri.ST_IsRing;
import org.apache.hadoop.hive.ql.udf.esri.ST_IsSimple;
import org.apache.hadoop.hive.ql.udf.esri.ST_Length;
import org.apache.hadoop.hive.ql.udf.esri.ST_LineFromWKB;
import org.apache.hadoop.hive.ql.udf.esri.ST_LineString;
import org.apache.hadoop.hive.ql.udf.esri.ST_M;
import org.apache.hadoop.hive.ql.udf.esri.ST_MLineFromWKB;
import org.apache.hadoop.hive.ql.udf.esri.ST_MPointFromWKB;
import org.apache.hadoop.hive.ql.udf.esri.ST_MPolyFromWKB;
import org.apache.hadoop.hive.ql.udf.esri.ST_MaxM;
import org.apache.hadoop.hive.ql.udf.esri.ST_MaxX;
import org.apache.hadoop.hive.ql.udf.esri.ST_MaxY;
import org.apache.hadoop.hive.ql.udf.esri.ST_MaxZ;
import org.apache.hadoop.hive.ql.udf.esri.ST_MinM;
import org.apache.hadoop.hive.ql.udf.esri.ST_MinX;
import org.apache.hadoop.hive.ql.udf.esri.ST_MinY;
import org.apache.hadoop.hive.ql.udf.esri.ST_MinZ;
import org.apache.hadoop.hive.ql.udf.esri.ST_MultiLineString;
import org.apache.hadoop.hive.ql.udf.esri.ST_MultiPoint;
import org.apache.hadoop.hive.ql.udf.esri.ST_MultiPolygon;
import org.apache.hadoop.hive.ql.udf.esri.ST_NumGeometries;
import org.apache.hadoop.hive.ql.udf.esri.ST_NumInteriorRing;
import org.apache.hadoop.hive.ql.udf.esri.ST_NumPoints;
import org.apache.hadoop.hive.ql.udf.esri.ST_Overlaps;
import org.apache.hadoop.hive.ql.udf.esri.ST_Point;
import org.apache.hadoop.hive.ql.udf.esri.ST_PointFromWKB;
import org.apache.hadoop.hive.ql.udf.esri.ST_PointN;
import org.apache.hadoop.hive.ql.udf.esri.ST_PointZ;
import org.apache.hadoop.hive.ql.udf.esri.ST_PolyFromWKB;
import org.apache.hadoop.hive.ql.udf.esri.ST_Polygon;
import org.apache.hadoop.hive.ql.udf.esri.ST_Relate;
import org.apache.hadoop.hive.ql.udf.esri.ST_SRID;
import org.apache.hadoop.hive.ql.udf.esri.ST_SetSRID;
import org.apache.hadoop.hive.ql.udf.esri.ST_StartPoint;
import org.apache.hadoop.hive.ql.udf.esri.ST_SymmetricDiff;
import org.apache.hadoop.hive.ql.udf.esri.ST_Touches;
import org.apache.hadoop.hive.ql.udf.esri.ST_Union;
import org.apache.hadoop.hive.ql.udf.esri.ST_Within;
import org.apache.hadoop.hive.ql.udf.esri.ST_X;
import org.apache.hadoop.hive.ql.udf.esri.ST_Y;
import org.apache.hadoop.hive.ql.udf.esri.ST_Z;
import org.apache.hadoop.hive.ql.exec.FunctionInfo.FunctionResource;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -586,6 +669,94 @@ public final class FunctionRegistry {
system.registerGenericUDF(GenericUDFMaskShowLastN.UDF_NAME, GenericUDFMaskShowLastN.class);
system.registerGenericUDF(GenericUDFMaskHash.UDF_NAME, GenericUDFMaskHash.class);

// GeoSpatial UDFs
system.registerFunction("ST_Length", ST_Length.class);
system.registerFunction("ST_LineString", ST_LineString.class);
system.registerFunction("ST_Point", ST_Point.class);
system.registerFunction("ST_AsText", ST_AsText.class);
system.registerFunction("ST_Aggr_ConvexHull", ST_Aggr_ConvexHull.class);
system.registerFunction("ST_Aggr_Union", ST_Aggr_Union.class);
system.registerFunction("ST_Area", ST_Area.class);
system.registerFunction("ST_AsBinary", ST_AsBinary.class);
system.registerFunction("ST_AsGeoJson", ST_AsGeoJson.class);
system.registerFunction("ST_AsJson", ST_AsJson.class);
system.registerFunction("ST_AsShape", ST_AsShape.class);
system.registerFunction("ST_Bin", ST_Bin.class);
system.registerFunction("ST_BinEnvelope", ST_BinEnvelope.class);
system.registerFunction("ST_Boundary", ST_Boundary.class);
system.registerFunction("ST_Buffer", ST_Buffer.class);
system.registerFunction("ST_Centroid", ST_Centroid.class);
system.registerFunction("ST_Contains", ST_Contains.class);
system.registerFunction("ST_ConvexHull", ST_ConvexHull.class);
system.registerFunction("ST_CoordDim", ST_CoordDim.class);
system.registerFunction("ST_Crosses", ST_Crosses.class);
system.registerFunction("ST_Difference", ST_Difference.class);
system.registerFunction("ST_Dimension", ST_Dimension.class);
system.registerFunction("ST_Disjoint", ST_Disjoint.class);
system.registerFunction("ST_Distance", ST_Distance.class);
system.registerFunction("ST_EndPoint", ST_EndPoint.class);
system.registerFunction("ST_Envelope", ST_Envelope.class);
system.registerFunction("ST_EnvIntersects", ST_EnvIntersects.class);
system.registerFunction("ST_Equals", ST_Equals.class);
system.registerFunction("ST_ExteriorRing", ST_ExteriorRing.class);
system.registerFunction("ST_GeodesicLengthWGS84", ST_GeodesicLengthWGS84.class);
system.registerFunction("ST_GeomCollection", ST_GeomCollection.class);
system.registerFunction("ST_GeometryN", ST_GeometryN.class);
system.registerFunction("ST_GeometryProcessing", ST_GeometryProcessing.class);
system.registerFunction("ST_GeomFromGeoJson", ST_GeomFromGeoJson.class);
system.registerFunction("ST_GeomFromJson", ST_GeomFromJson.class);
system.registerFunction("ST_GeomFromShape", ST_GeomFromShape.class);
system.registerFunction("ST_GeomFromText", ST_GeomFromText.class);
system.registerFunction("ST_GeomFromWKB", ST_GeomFromWKB.class);
system.registerFunction("ST_InteriorRingN", ST_InteriorRingN.class);
system.registerFunction("ST_Intersection", ST_Intersection.class);
system.registerFunction("ST_Intersects", ST_Intersects.class);
system.registerFunction("ST_Is3D", ST_Is3D.class);
system.registerFunction("ST_IsClosed", ST_IsClosed.class);
system.registerFunction("ST_IsEmpty", ST_IsEmpty.class);
system.registerFunction("ST_IsMeasured", ST_IsMeasured.class);
system.registerFunction("ST_IsRing", ST_IsRing.class);
system.registerFunction("ST_IsSimple", ST_IsSimple.class);
system.registerFunction("ST_LineFromWKB", ST_LineFromWKB.class);
system.registerFunction("ST_M", ST_M.class);
system.registerFunction("ST_MaxM", ST_MaxM.class);
system.registerFunction("ST_MaxX", ST_MaxX.class);

system.registerFunction("ST_MaxY", ST_MaxY.class);
system.registerFunction("ST_MaxZ", ST_MaxZ.class);
system.registerFunction("ST_MinM", ST_MinM.class);
system.registerFunction("ST_MinX", ST_MinX.class);
system.registerFunction("ST_MinY", ST_MinY.class);
system.registerFunction("ST_MinZ", ST_MinZ.class);
system.registerFunction("ST_MLineFromWKB", ST_MLineFromWKB.class);
system.registerFunction("ST_MPointFromWKB", ST_MPointFromWKB.class);
system.registerFunction("ST_MPolyFromWKB", ST_MPolyFromWKB.class);
system.registerFunction("ST_MultiLineString", ST_MultiLineString.class);
system.registerFunction("ST_MultiPoint", ST_MultiPoint.class);
system.registerFunction("ST_MultiPolygon", ST_MultiPolygon.class);
system.registerFunction("ST_NumGeometries", ST_NumGeometries.class);
system.registerFunction("ST_NumInteriorRing", ST_NumInteriorRing.class);
system.registerFunction("ST_NumPoints", ST_NumPoints.class);
system.registerFunction("ST_Overlaps", ST_Overlaps.class);
system.registerFunction("ST_PointFromWKB", ST_PointFromWKB.class);
system.registerFunction("ST_PointN", ST_PointN.class);

system.registerFunction("ST_PointZ", ST_PointZ.class);
system.registerFunction("ST_PolyFromWKB", ST_PolyFromWKB.class);
system.registerFunction("ST_Polygon", ST_Polygon.class);
system.registerFunction("ST_Relate", ST_Relate.class);
system.registerFunction("ST_SetSRID", ST_SetSRID.class);
system.registerFunction("ST_SRID", ST_SRID.class);
system.registerFunction("ST_StartPoint", ST_StartPoint.class);
system.registerFunction("ST_SymmetricDiff", ST_SymmetricDiff.class);
system.registerFunction("ST_Touches", ST_Touches.class);
system.registerFunction("ST_Union", ST_Union.class);
system.registerFunction("ST_Within", ST_Within.class);
system.registerFunction("ST_X", ST_X.class);
system.registerFunction("ST_Y", ST_Y.class);
system.registerFunction("ST_Z", ST_Z.class);


try {
system.registerGenericUDF("iceberg_bucket",
(Class<? extends GenericUDF>) Class.forName("org.apache.iceberg.mr.hive.GenericUDFIcebergBucket"));
@@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.udf.esri;

import com.esri.core.geometry.Envelope;

public class BinUtils {
final long numCols;
final double extentMin;
final double extentMax;
final double binSize;

public BinUtils(double binSize) {
this.binSize = binSize;

// absolute max number of rows/columns we can have
long maxBinsPerAxis = (long) Math.sqrt(Long.MAX_VALUE);

// a smaller binSize gives us a smaller extent width and height that
// can be addressed by a single 64 bit long
double size = (binSize < 1) ? maxBinsPerAxis * binSize : maxBinsPerAxis;

extentMax = size / 2;
extentMin = extentMax - size;
numCols = (long) (Math.ceil(size / binSize));
}

/**
* Gets bin ID from a point.
*
* @param x
* @param y
* @return
*/
public long getId(double x, double y) {
double down = (extentMax - y) / binSize;
double over = (x - extentMin) / binSize;

return ((long) down * numCols) + (long) over;
}

/**
* Gets the envelope for the bin ID.
*
* @param binId
* @param envelope
*/
public void queryEnvelope(long binId, Envelope envelope) {
long down = binId / numCols;
long over = binId % numCols;

double xmin = extentMin + (over * binSize);
double xmax = xmin + binSize;
double ymax = extentMax - (down * binSize);
double ymin = ymax - binSize;

envelope.setCoords(xmin, ymin, xmax, ymax);
}

/**
* Gets the envelope for the bin that contains the x,y coords.
*
* @param x
* @param y
* @param envelope
*/
public void queryEnvelope(double x, double y, Envelope envelope) {
double down = (extentMax - y) / binSize;
double over = (x - extentMin) / binSize;

double xmin = extentMin + (over * binSize);
double xmax = xmin + binSize;
double ymax = extentMax - (down * binSize);
double ymin = ymax - binSize;

envelope.setCoords(xmin, ymin, xmax, ymax);
}
}

0 comments on commit 6bfb6f6

Please sign in to comment.