From 298a1368f7539fb320157e7a46723e3266cfa005 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Tue, 7 May 2024 11:44:47 -0700 Subject: [PATCH] Add 1.5.2 --- python/README.md | 2 + python/build_wheel.sh | 2 + python/sedona/core/SpatialRDD/spatial_rdd.py | 2 - python/sedona/core/jvm/config.py | 15 +-- python/sedona/maps/SedonaKepler.py | 10 +- python/sedona/maps/SedonaMapUtils.py | 12 +- python/sedona/maps/SedonaPyDeck.py | 22 +++- python/sedona/raster_utils/SedonaUtils.py | 3 +- python/sedona/spark/SedonaContext.py | 2 +- python/sedona/spark/__init__.py | 14 +-- python/sedona/sql/st_aggregates.py | 10 +- python/sedona/sql/st_constructors.py | 48 ++++--- python/sedona/sql/st_functions.py | 119 ++---------------- python/sedona/sql/st_predicates.py | 17 +-- python/sedona/version.py | 2 +- python/setup.py | 7 +- python/src/geomserde_speedup_module.c | 38 ++++-- .../maps/test_sedonakepler_visualization.py | 20 ++- python/tests/maps/test_sedonapydeck.py | 4 +- python/tests/sql/test_dataframe_api.py | 21 +++- 20 files changed, 166 insertions(+), 204 deletions(-) diff --git a/python/README.md b/python/README.md index 7ce78f6..9447d59 100644 --- a/python/README.md +++ b/python/README.md @@ -1,5 +1,7 @@ # Apache Sedona + + This library is the Python wrapper for Apache Sedona. Apache Sedona is a cluster computing system for processing large-scale spatial data. diff --git a/python/build_wheel.sh b/python/build_wheel.sh index edb2cd7..e565825 100755 --- a/python/build_wheel.sh +++ b/python/build_wheel.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information diff --git a/python/sedona/core/SpatialRDD/spatial_rdd.py b/python/sedona/core/SpatialRDD/spatial_rdd.py index 1522c0e..43917a7 100644 --- a/python/sedona/core/SpatialRDD/spatial_rdd.py +++ b/python/sedona/core/SpatialRDD/spatial_rdd.py @@ -29,7 +29,6 @@ from sedona.core.enums.index_type import IndexTypeJvm, IndexType from sedona.core.enums.spatial import SpatialType from sedona.core.geom.envelope import Envelope -from sedona.core.jvm.config import since from sedona.core.jvm.translate import SedonaPythonConverter, JvmSedonaPythonConverter from sedona.utils.decorators import require from sedona.utils.jvm import JvmStorageLevel @@ -193,7 +192,6 @@ def countWithoutDuplicatesSPRDD(self) -> int: return self._srdd.countWithoutDuplicatesSPRDD() @property - @since("1.0.0") def fieldNames(self) -> List[str]: """ diff --git a/python/sedona/core/jvm/config.py b/python/sedona/core/jvm/config.py index 3350fa8..96bc22e 100644 --- a/python/sedona/core/jvm/config.py +++ b/python/sedona/core/jvm/config.py @@ -18,7 +18,7 @@ import logging import os from re import findall -from typing import Any, Optional, Tuple +from typing import Optional, Tuple from py4j.protocol import Py4JJavaError from pyspark.sql import SparkSession @@ -89,13 +89,11 @@ def decorator(func1): @functools.wraps(func1) def new_func1(*args, **kwargs): - warnings.simplefilter('always', DeprecationWarning) warnings.warn( fmt1.format(name=func1.__name__, reason=reason), category=DeprecationWarning, stacklevel=2 ) - warnings.simplefilter('default', DeprecationWarning) return func1(*args, **kwargs) return new_func1 @@ -121,13 +119,11 @@ def new_func1(*args, **kwargs): @functools.wraps(func2) def new_func2(*args, **kwargs): - warnings.simplefilter('always', DeprecationWarning) warnings.warn( fmt2.format(name=func2.__name__), category=DeprecationWarning, stacklevel=2 ) - warnings.simplefilter('default', DeprecationWarning) return func2(*args, **kwargs) return new_func2 @@ -191,10 +187,9 @@ def get_spark_java_config( try: used_jar_files = java_spark_conf.get(value) - except Py4JJavaError as java_error: - error_message = "Failed to get the value of {} from SparkConf: {}".format( - value, java_error - ) + except Py4JJavaError: + error_message = "Didn't find the value of {} from SparkConf".format(value) + logging.info(error_message) return used_jar_files, error_message @@ -204,7 +199,7 @@ class SedonaMeta: def get_version(cls, spark_jars: str) -> Optional[str]: # Find Spark version, Scala version and Sedona version. versions = findall( - r"sedona-(?:python-adapter|spark-shaded)-([^,\n]{3})_([^,\n]{4})-([^,\n]{5})", + r"sedona-(?:python-adapter|spark-shaded|spark)-([^,\n]{3})_([^,\n]{4})-([^,\n]{5})", spark_jars, ) print(versions) diff --git a/python/sedona/maps/SedonaKepler.py b/python/sedona/maps/SedonaKepler.py index 21f7542..1136992 100644 --- a/python/sedona/maps/SedonaKepler.py +++ b/python/sedona/maps/SedonaKepler.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -from keplergl import KeplerGl from sedona.maps.SedonaMapUtils import SedonaMapUtils @@ -30,6 +29,13 @@ def create_map(cls, df=None, name="unnamed", config=None): dataframe, if a df is passed with no name, a default name of 'unnamed' is set for it. param config: [Optional] A map config to be applied to the rendered map :return: A map object """ + + try: + from keplergl import KeplerGl + except ImportError: + msg = "Install sedona[kepler-map] to convert sedona dataframes to kepler maps." + raise ImportError(msg) from None + kepler_map = KeplerGl() if df is not None: SedonaKepler.add_df(kepler_map, df, name) @@ -48,5 +54,5 @@ def add_df(cls, kepler_map, df, name="unnamed"): :param name: [Optional] Name to assign to the dataframe, default name assigned is 'unnamed' :return: Does not return anything, adds df directly to the given map object """ - geo_df = SedonaMapUtils.__convert_to_gdf__(df) + geo_df = SedonaMapUtils.__convert_to_gdf_or_pdf__(df) kepler_map.add_data(geo_df, name=name) diff --git a/python/sedona/maps/SedonaMapUtils.py b/python/sedona/maps/SedonaMapUtils.py index d50d7e9..5ab875d 100644 --- a/python/sedona/maps/SedonaMapUtils.py +++ b/python/sedona/maps/SedonaMapUtils.py @@ -24,14 +24,20 @@ class SedonaMapUtils: @classmethod - def __convert_to_gdf__(cls, df, rename=True, geometry_col=None): + def __convert_to_gdf_or_pdf__(cls, df, rename=True, geometry_col=None): """ Converts a SedonaDataFrame to a GeoPandasDataFrame and also renames geometry column to a standard name of - 'geometry' :param df: SedonaDataFrame to convert :param geometry_col: [Optional] :return: + 'geometry' + However, if no geometry column is found even after traversing schema, returns a Pandas Dataframe + :param df: SedonaDataFrame to convert + :param geometry_col: [Optional] + :return: GeoPandas Dataframe or Pandas Dataframe """ if geometry_col is None: geometry_col = SedonaMapUtils.__get_geometry_col__(df) pandas_df = df.toPandas() + if geometry_col is None: # No geometry column found even after searching schema, return Pandas Dataframe + return pandas_df geo_df = gpd.GeoDataFrame(pandas_df, geometry=geometry_col) if geometry_col != "geometry" and rename is True: geo_df.rename_geometry("geometry", inplace=True) @@ -44,7 +50,7 @@ def __convert_to_geojson__(cls, df): :param df: SedonaDataFrame to convert :return: GeoJSON object """ - gdf = SedonaMapUtils.__convert_to_gdf__(df) + gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df) gjson_str = gdf.to_json() gjson = json.loads(gjson_str) return gjson diff --git a/python/sedona/maps/SedonaPyDeck.py b/python/sedona/maps/SedonaPyDeck.py index 154b8a4..8270e59 100644 --- a/python/sedona/maps/SedonaPyDeck.py +++ b/python/sedona/maps/SedonaPyDeck.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -import pydeck as pdk +from types import ModuleType from sedona.maps.SedonaMapUtils import SedonaMapUtils @@ -37,6 +37,7 @@ def create_choropleth_map(cls, df, fill_color=None, plot_col=None, initial_view_ :param map_provider: :return: A pydeck Map object with choropleth layer added: """ + pdk = _try_import_pydeck() if initial_view_state is None: gdf = SedonaPyDeck._prepare_df_(df, add_coords=True) @@ -79,6 +80,8 @@ def create_geometry_map(cls, df, fill_color="[85, 183, 177, 255]", line_color="[ :param map_provider: optional map_provider of the pydeck map :return: A pydeck map with a GeoJsonLayer map added """ + pdk = _try_import_pydeck() + geometry_col = SedonaMapUtils.__get_geometry_col__(df) gdf = SedonaPyDeck._prepare_df_(df, geometry_col=geometry_col) geom_type = gdf[geometry_col][0].geom_type @@ -116,6 +119,8 @@ def create_scatterplot_map(cls, df, fill_color="[255, 140, 0]", radius_col=1, ra :param map_provider: optional map_provider to be added to the pydeck map :return: A pydeck map object with a scatterplot layer added """ + pdk = _try_import_pydeck() + gdf = SedonaPyDeck._prepare_df_(df, add_coords=True) layer = pdk.Layer( "ScatterplotLayer", @@ -152,6 +157,7 @@ def create_heatmap(cls, df, color_range=None, weight=1, aggregation="SUM", initi :param map_provider: Optional map_provider for the pydeck map :return: A pydeck map with a heatmap layer added """ + pdk = _try_import_pydeck() gdf = SedonaPyDeck._prepare_df_(df, add_coords=True) @@ -195,7 +201,7 @@ def _prepare_df_(cls, df, add_coords=False, geometry_col=None): """ if geometry_col is None: geometry_col = SedonaMapUtils.__get_geometry_col__(df=df) - gdf = SedonaMapUtils.__convert_to_gdf__(df, rename=False, geometry_col=geometry_col) + gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df, rename=False, geometry_col=geometry_col) if add_coords is True: SedonaPyDeck._create_coord_column_(gdf=gdf, geometry_col=geometry_col) return gdf @@ -239,6 +245,7 @@ def _create_coord_column_(cls, gdf, geometry_col, add_points=False): @classmethod def _create_fat_layer_(cls, gdf, fill_color, line_color, elevation_col): + pdk = _try_import_pydeck() layer = pdk.Layer( 'GeoJsonLayer', # `type` positional argument is here data=gdf, @@ -254,3 +261,14 @@ def _create_fat_layer_(cls, gdf, fill_color, line_color, elevation_col): ) return layer + + +def _try_import_pydeck() -> ModuleType: + try: + import pydeck as pdk + + except ImportError: + msg = "Install sedona[pydeck-map] to convert sedona dataframes to pydeck maps." + raise ImportError(msg) from None + + return pdk diff --git a/python/sedona/raster_utils/SedonaUtils.py b/python/sedona/raster_utils/SedonaUtils.py index 7983566..4dbf9e3 100644 --- a/python/sedona/raster_utils/SedonaUtils.py +++ b/python/sedona/raster_utils/SedonaUtils.py @@ -15,9 +15,8 @@ # specific language governing permissions and limitations # under the License. -from IPython.display import display, HTML - class SedonaUtils: @classmethod def display_image(cls, df): + from IPython.display import display, HTML display(HTML(df.toPandas().to_html(escape=False))) diff --git a/python/sedona/spark/SedonaContext.py b/python/sedona/spark/SedonaContext.py index 76be957..cda98a6 100644 --- a/python/sedona/spark/SedonaContext.py +++ b/python/sedona/spark/SedonaContext.py @@ -35,7 +35,7 @@ def create(cls, spark: SparkSession) -> SparkSession: """ spark.sql("SELECT 1 as geom").count() PackageImporter.import_jvm_lib(spark._jvm) - spark._jvm.SedonaContext.create(spark._jsparkSession) + spark._jvm.SedonaContext.create(spark._jsparkSession, "python") return spark @classmethod diff --git a/python/sedona/spark/__init__.py b/python/sedona/spark/__init__.py index 73c4b2d..c98234a 100644 --- a/python/sedona/spark/__init__.py +++ b/python/sedona/spark/__init__.py @@ -41,11 +41,9 @@ from sedona.register import SedonaRegistrator from sedona.spark.SedonaContext import SedonaContext from sedona.raster_utils.SedonaUtils import SedonaUtils -try: - from sedona.maps.SedonaKepler import SedonaKepler -except: - print('Skipping SedonaKepler import, verify if keplergl is installed') -try: - from sedona.maps.SedonaPyDeck import SedonaPyDeck -except: - print('Skipping SedonaPyDeck import, verify if pydeck is installed') +from sedona.maps.SedonaKepler import SedonaKepler +from sedona.maps.SedonaPyDeck import SedonaPyDeck +from sedona.sql.st_aggregates import * +from sedona.sql.st_constructors import * +from sedona.sql.st_functions import * +from sedona.sql.st_predicates import * diff --git a/python/sedona/sql/st_aggregates.py b/python/sedona/sql/st_aggregates.py index 4fca398..184ed38 100644 --- a/python/sedona/sql/st_aggregates.py +++ b/python/sedona/sql/st_aggregates.py @@ -14,6 +14,8 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import inspect +import sys from functools import partial @@ -23,11 +25,9 @@ _call_aggregate_function = partial(call_sedona_function, "st_aggregates") -__all__ = [ - "ST_Envelope_Aggr", - "ST_Intersection_Aggr", - "ST_Union_Aggr", -] +# Automatically populate __all__ +__all__ = [name for name, obj in inspect.getmembers(sys.modules[__name__]) + if inspect.isfunction(obj)] @validate_argument_types diff --git a/python/sedona/sql/st_constructors.py b/python/sedona/sql/st_constructors.py index 4195a6f..ae9237a 100644 --- a/python/sedona/sql/st_constructors.py +++ b/python/sedona/sql/st_constructors.py @@ -14,6 +14,8 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import inspect +import sys from functools import partial from typing import Optional, Union @@ -23,25 +25,9 @@ from sedona.sql.dataframe_api import ColumnOrName, ColumnOrNameOrNumber, call_sedona_function, validate_argument_types -__all__ = [ - "ST_GeomFromGeoHash", - "ST_GeomFromGeoJSON", - "ST_GeomFromGML", - "ST_GeomFromKML", - "ST_GeomFromText", - "ST_GeomFromWKB", - "ST_GeomFromWKT", - "ST_GeomFromEWKT", - "ST_LineFromText", - "ST_LineStringFromText", - "ST_Point", - "ST_PointFromText", - "ST_MakePoint" - "ST_PolygonFromEnvelope", - "ST_PolygonFromText", - "ST_MLineFromText", - "ST_MPolyFromText" -] +# Automatically populate __all__ +__all__ = [name for name, obj in inspect.getmembers(sys.modules[__name__]) + if inspect.isfunction(obj)] _call_constructor_function = partial(call_sedona_function, "st_constructors") @@ -98,7 +84,7 @@ def ST_GeomFromKML(kml_string: ColumnOrName) -> Column: @validate_argument_types -def ST_GeomFromText(wkt: ColumnOrName) -> Column: +def ST_GeomFromText(wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = None) -> Column: """Generate a geometry column from a Well-Known Text (WKT) string column. This is an alias of ST_GeomFromWKT. @@ -107,7 +93,9 @@ def ST_GeomFromText(wkt: ColumnOrName) -> Column: :return: Geometry column representing the WKT string. :rtype: Column """ - return _call_constructor_function("ST_GeomFromText", wkt) + args = (wkt) if srid is None else (wkt, srid) + + return _call_constructor_function("ST_GeomFromText", args) @validate_argument_types @@ -123,7 +111,7 @@ def ST_GeomFromWKB(wkb: ColumnOrName) -> Column: @validate_argument_types -def ST_GeomFromWKT(wkt: ColumnOrName) -> Column: +def ST_GeomFromWKT(wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = None) -> Column: """Generate a geometry column from a Well-Known Text (WKT) string column. This is an alias of ST_GeomFromText. @@ -132,7 +120,9 @@ def ST_GeomFromWKT(wkt: ColumnOrName) -> Column: :return: Geometry column representing the WKT string. :rtype: Column """ - return _call_constructor_function("ST_GeomFromWKT", wkt) + args = (wkt) if srid is None else (wkt, srid) + + return _call_constructor_function("ST_GeomFromWKT", args) @validate_argument_types def ST_GeomFromEWKT(ewkt: ColumnOrName) -> Column: @@ -275,7 +265,7 @@ def ST_PolygonFromText(coords: ColumnOrName, delimiter: ColumnOrName) -> Column: return _call_constructor_function("ST_PolygonFromText", (coords, delimiter)) @validate_argument_types -def ST_MPolyFromText(wkt: ColumnOrName) -> Column: +def ST_MPolyFromText(wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = None) -> Column: """Generate multiPolygon geometry from a multiPolygon WKT representation. :param wkt: multiPolygon WKT string column to generate from. @@ -283,10 +273,12 @@ def ST_MPolyFromText(wkt: ColumnOrName) -> Column: :return: multiPolygon geometry generated from the wkt column. :rtype: Column """ - return _call_constructor_function("ST_MPolyFromText", wkt) + args = (wkt) if srid is None else (wkt, srid) + + return _call_constructor_function("ST_MPolyFromText", args) @validate_argument_types -def ST_MLineFromText(wkt: ColumnOrName) -> Column: +def ST_MLineFromText(wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = None) -> Column: """Generate multiLineString geometry from a multiLineString WKT representation. :param wkt: multiLineString WKT string column to generate from. @@ -294,4 +286,6 @@ def ST_MLineFromText(wkt: ColumnOrName) -> Column: :return: multiLineString geometry generated from the wkt column. :rtype: Column """ - return _call_constructor_function("ST_MLineFromText", wkt) + args = (wkt) if srid is None else (wkt, srid) + + return _call_constructor_function("ST_MLineFromText", args) diff --git a/python/sedona/sql/st_functions.py b/python/sedona/sql/st_functions.py index aad15ea..808e36d 100644 --- a/python/sedona/sql/st_functions.py +++ b/python/sedona/sql/st_functions.py @@ -14,6 +14,8 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import inspect +import sys from functools import partial from typing import Optional, Union @@ -23,115 +25,9 @@ from sedona.sql.dataframe_api import call_sedona_function, ColumnOrName, ColumnOrNameOrNumber, validate_argument_types -__all__ = [ - "GeometryType", - "ST_3DDistance", - "ST_AddPoint", - "ST_Area", - "ST_AreaSpheroid", - "ST_AsBinary", - "ST_AsEWKB", - "ST_AsEWKT", - "ST_AsGeoJSON", - "ST_AsGML", - "ST_AsKML", - "ST_AsText", - "ST_Azimuth", - "ST_Boundary", - "ST_Buffer", - "ST_BuildArea", - "ST_Centroid", - "ST_Collect", - "ST_CollectionExtract", - "ST_ClosestPoint", - "ST_ConcaveHull", - "ST_ConvexHull", - "ST_Difference", - "ST_Dimension", - "ST_Distance", - "ST_DistanceSphere", - "ST_DistanceSpheroid", - "ST_Dump", - "ST_DumpPoints", - "ST_EndPoint", - "ST_Envelope", - "ST_ExteriorRing", - "ST_FlipCoordinates", - "ST_Force_2D", - "ST_GeoHash", - "ST_GeometricMedian", - "ST_GeometryN", - "ST_GeometryType", - "ST_H3CellDistance", - "ST_H3CellIDs", - "ST_H3KRing", - "ST_H3ToGeom", - "ST_InteriorRingN", - "ST_Intersection", - "ST_IsClosed", - "ST_IsEmpty", - "ST_IsRing", - "ST_IsSimple", - "ST_IsValid", - "ST_IsValidReason", - "ST_Length", - "ST_LengthSpheroid", - "ST_LineFromMultiPoint", - "ST_LineInterpolatePoint", - "ST_LineLocatePoint", - "ST_LineMerge", - "ST_LineSubstring", - "ST_MakeLine", - "ST_Polygon" - "ST_MakePolygon", - "ST_MakeValid", - "ST_MinimumBoundingCircle", - "ST_MinimumBoundingRadius", - "ST_Multi", - "ST_Normalize", - "ST_NPoints", - "ST_NDims", - "ST_NumGeometries", - "ST_NumInteriorRings", - "ST_PointN", - "ST_PointOnSurface", - "ST_ReducePrecision", - "ST_RemovePoint", - "ST_Reverse", - "ST_S2CellIDs", - "ST_SetPoint", - "ST_SetSRID", - "ST_SRID", - "ST_Split", - "ST_StartPoint", - "ST_SubDivide", - "ST_SubDivideExplode", - "ST_SimplifyPreserveTopology", - "ST_SymDifference", - "ST_Transform", - "ST_Union", - "ST_X", - "ST_XMax", - "ST_XMin", - "ST_Y", - "ST_YMax", - "ST_YMin", - "ST_Z", - "ST_ZMax", - "ST_ZMin", - "ST_NumPoints", - "ST_Force3D", - "ST_NRings", - "ST_Translate", - "ST_VoronoiPolygons", - "ST_Angle", - "ST_Degrees", - "ST_FrechetDistance", - "ST_CoordDim", - "ST_IsCollection", - "ST_Affine", - "ST_BoundingDiagonal" -] +# Automatically populate __all__ +__all__ = [name for name, obj in inspect.getmembers(sys.modules[__name__]) + if inspect.isfunction(obj)] _call_st_function = partial(call_sedona_function, "st_functions") @@ -1257,7 +1153,7 @@ def ST_Split(input: ColumnOrName, blade: ColumnOrName) -> Column: :return: Multi-geometry representing the split of input by blade. :rtype: Column """ - return _call_st_function("ST_SymDifference", (input, blade)) + return _call_st_function("ST_Split", (input, blade)) @validate_argument_types @@ -1480,6 +1376,7 @@ def ST_VoronoiPolygons(geometry: ColumnOrName, tolerance: Optional[Union[ColumnO args = (geometry, tolerance, extendTo) return _call_st_function("ST_VoronoiPolygons", args) +@validate_argument_types def ST_FrechetDistance(g1: ColumnOrName, g2: ColumnOrName) -> Column: """ Computes discrete frechet distance between the two geometries. @@ -1522,7 +1419,7 @@ def ST_Affine(geometry: ColumnOrName, a: Union[ColumnOrName, float], b: Union[Co args = (geometry, a, b, c, d, e, f, g, h, i, xOff, yOff, zOff) return _call_st_function("ST_Affine", args) - +@validate_argument_types def ST_BoundingDiagonal(geometry: ColumnOrName) -> Column: """ Returns a LineString with the min/max values of each dimension of the bounding box of the given geometry as its diff --git a/python/sedona/sql/st_predicates.py b/python/sedona/sql/st_predicates.py index 8f31918..48a4411 100644 --- a/python/sedona/sql/st_predicates.py +++ b/python/sedona/sql/st_predicates.py @@ -14,6 +14,8 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import inspect +import sys from functools import partial @@ -23,18 +25,9 @@ from sedona.sql.dataframe_api import ColumnOrName, call_sedona_function, validate_argument_types -__all__ = [ - "ST_Contains", - "ST_Crosses", - "ST_Disjoint", - "ST_Equals", - "ST_Intersects", - "ST_OrderingEquals", - "ST_Overlaps", - "ST_Touches", - "ST_Within", - "ST_DWithin" -] +# Automatically populate __all__ +__all__ = [name for name, obj in inspect.getmembers(sys.modules[__name__]) + if inspect.isfunction(obj)] _call_predicate_function = partial(call_sedona_function, "st_predicates") diff --git a/python/sedona/version.py b/python/sedona/version.py index 07fb50d..9778530 100644 --- a/python/sedona/version.py +++ b/python/sedona/version.py @@ -15,4 +15,4 @@ # specific language governing permissions and limitations # under the License. -version = "1.5.1" +version = "1.5.2" diff --git a/python/setup.py b/python/setup.py index a6bc7b2..7576957 100644 --- a/python/setup.py +++ b/python/setup.py @@ -53,7 +53,12 @@ long_description_content_type="text/markdown", python_requires='>=3.6', install_requires=['attrs', "shapely>=1.7.0"], - extras_require={"spark": ['pyspark>=2.3.0']}, + extras_require={ + "spark": ["pyspark>=2.3.0"], + "pydeck-map": ["pandas<=1.3.5", "geopandas<=0.10.2", "pydeck==0.8.0"], + "kepler-map": ["pandas<=1.3.5", "geopandas<=0.10.2", "keplergl==0.3.2"], + "all": ["pyspark>=2.3.0", "pandas<=1.3.5", "geopandas<=0.10.2","pydeck==0.8.0", "keplergl==0.3.2"], + }, project_urls={ 'Documentation': 'https://sedona.apache.org', 'Source code': 'https://github.com/apache/sedona', diff --git a/python/src/geomserde_speedup_module.c b/python/src/geomserde_speedup_module.c index e49efb4..2edf4b7 100644 --- a/python/src/geomserde_speedup_module.c +++ b/python/src/geomserde_speedup_module.c @@ -192,21 +192,33 @@ static GEOSGeometry *do_deserialize(PyObject *args, /* serialize/deserialize functions for Shapely 2.x */ static PyObject *serialize(PyObject *self, PyObject *args) { - PyObject *pygeos_geom = NULL; - if (!PyArg_ParseTuple(args, "O", &pygeos_geom)) { - return NULL; - } + PyObject *pygeos_geom = NULL; + if (!PyArg_ParseTuple(args, "O", &pygeos_geom)) { + return NULL; // Argument parsing failed; error already set by PyArg_ParseTuple + } - GEOSGeometry *geos_geom = NULL; - char success = PyGEOS_GetGEOSGeometry(pygeos_geom, &geos_geom); - if (success == 0) { - PyErr_SetString( - PyExc_TypeError, - "Argument is of incorrect type. Please provide only Geometry objects."); - return NULL; - } + GEOSGeometry *geos_geom = NULL; + char success = PyGEOS_GetGEOSGeometry(pygeos_geom, &geos_geom); + if (success == 0) { + // Retrieve the type of the supplied object + PyObject *type = (PyObject *)Py_TYPE(pygeos_geom); + PyObject *type_name = PyObject_GetAttrString(type, "__name__"); + if (type_name == NULL) { + // Fallback error if we can't get the type name + PyErr_SetString(PyExc_TypeError, "Argument is of incorrect type."); + } else { + // Construct the error message with the type name + const char *type_str = PyUnicode_AsUTF8(type_name); + char error_msg[256]; + snprintf(error_msg, sizeof(error_msg), "Argument is of incorrect type: '%s'. Please provide only Geometry objects.", type_str); + + PyErr_SetString(PyExc_TypeError, error_msg); + Py_DECREF(type_name); // Cleanup the reference to type_name + } + return NULL; + } - return do_serialize(geos_geom); + return do_serialize(geos_geom); } static PyObject *deserialize(PyObject *self, PyObject *args) { diff --git a/python/tests/maps/test_sedonakepler_visualization.py b/python/tests/maps/test_sedonakepler_visualization.py index b636fbf..fc8ae32 100644 --- a/python/tests/maps/test_sedonakepler_visualization.py +++ b/python/tests/maps/test_sedonakepler_visualization.py @@ -21,6 +21,7 @@ from tests import mixed_wkt_geometry_input_location from tests import csv_point_input_location import geopandas as gpd +from pyspark.sql.functions import explode, hex class TestVisualization(TestBase): @@ -70,6 +71,22 @@ def test_df_addition(self): assert sedona_kepler_empty_map._repr_html_() == kepler_map._repr_html_() assert sedona_kepler_empty_map.config == kepler_map.config + def test_pandas_df_addition(self): + polygon_wkt_df = self.spark.read.format("csv"). \ + option("delimiter", "\t"). \ + option("header", "false"). \ + load(mixed_wkt_geometry_input_location) + + polygon_wkt_df.createOrReplaceTempView("polygontable") + polygon_h3_df = self.spark.sql( + "select ST_H3CellIDs(ST_GeomFromWKT(polygontable._c0), 3, false) as h3_cellID from polygontable") + polygon_exploded_h3 = polygon_h3_df.select(explode(polygon_h3_df.h3_cellID).alias("h3")) + polygon_hex_exploded_h3 = polygon_exploded_h3.select(hex(polygon_exploded_h3.h3).alias("hex_h3")) + kepler_map = SedonaKepler.create_map(df=polygon_hex_exploded_h3, name="h3") + + # just test if the map creation is successful. + assert kepler_map is not None + def test_adding_multiple_datasets(self): config = {'version': 'v1', 'config': {'visState': {'filters': [], @@ -180,7 +197,8 @@ def test_adding_multiple_datasets(self): load(csv_point_input_location) point_csv_df.createOrReplaceTempView("pointtable") - point_df = self.spark.sql("select ST_Point(cast(pointtable._c0 as Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from pointtable") + point_df = self.spark.sql( + "select ST_Point(cast(pointtable._c0 as Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from pointtable") polygon_wkt_df.createOrReplaceTempView("polygontable") polygon_df = self.spark.sql("select ST_GeomFromWKT(polygontable._c0) as countyshape from polygontable") diff --git a/python/tests/maps/test_sedonapydeck.py b/python/tests/maps/test_sedonapydeck.py index e7d7c0f..7046f54 100644 --- a/python/tests/maps/test_sedonapydeck.py +++ b/python/tests/maps/test_sedonapydeck.py @@ -109,7 +109,7 @@ def testScatterplotMap(self): p_map = pdk.Deck(layers=[layer]) sedona_pydeck_map = SedonaPyDeck.create_scatterplot_map(df=chicago_crimes_df) - assert self.isMapEqual(sedona_map=sedona_pydeck_map, pydeck_map=p_map) == True + assert self.isMapEqual(sedona_map=sedona_pydeck_map, pydeck_map=p_map) def testHeatmap(self): chicago_crimes_csv_df = self.spark.read.format("csv"). \ @@ -144,7 +144,7 @@ def testHeatmap(self): p_map = pdk.Deck(layers=[layer]) sedona_pydeck_map = SedonaPyDeck.create_heatmap(df=chicago_crimes_df) - assert self.isMapEqual(sedona_map=sedona_pydeck_map, pydeck_map=p_map) == True + assert self.isMapEqual(sedona_map=sedona_pydeck_map, pydeck_map=p_map) def isMapEqual(self, pydeck_map, sedona_map): sedona_dict = json.loads(sedona_map.to_json()) diff --git a/python/tests/sql/test_dataframe_api.py b/python/tests/sql/test_dataframe_api.py index f732bc6..86018c0 100644 --- a/python/tests/sql/test_dataframe_api.py +++ b/python/tests/sql/test_dataframe_api.py @@ -21,6 +21,11 @@ import pytest from shapely.geometry.base import BaseGeometry +from sedona.sql.st_aggregates import * +from sedona.sql.st_constructors import * +from sedona.sql.st_functions import * +from sedona.sql.st_predicates import * + from sedona.sql import ( st_aggregates as sta, st_constructors as stc, @@ -38,12 +43,20 @@ (stc.ST_GeomFromGML, ("gml",), "constructor", "", "LINESTRING (-71.16 42.25, -71.17 42.25, -71.18 42.25)"), (stc.ST_GeomFromKML, ("kml",), "constructor", "", "LINESTRING (-71.16 42.26, -71.17 42.26)"), (stc.ST_GeomFromText, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), + (stc.ST_GeomFromText, ("wkt",4326), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), (stc.ST_GeomFromWKB, ("wkb",), "constructor", "ST_ReducePrecision(geom, 2)", "LINESTRING (-2.1 -0.35, -1.5 -0.67)"), (stc.ST_GeomFromWKT, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), + (stc.ST_GeomFromWKT, ("wkt",4326), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), (stc.ST_GeomFromEWKT, ("ewkt",), "linestring_ewkt", "", "LINESTRING (1 2, 3 4)"), (stc.ST_LineFromText, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), (stc.ST_LineStringFromText, ("multiple_point", lambda: f.lit(',')), "constructor", "", "LINESTRING (0 0, 1 0, 1 1, 0 0)"), (stc.ST_Point, ("x", "y"), "constructor", "", "POINT (0 1)"), + (stc.ST_PointZ, ("x", "y", "z", 4326), "constructor", "", "POINT Z (0 1 2)"), + (stc.ST_PointZ, ("x", "y", "z"), "constructor", "", "POINT Z (0 1 2)"), + (stc.ST_MPolyFromText, ("mpoly",), "constructor", "" , "MULTIPOLYGON (((0 0, 20 0, 20 20, 0 20, 0 0), (5 5, 5 7, 7 7, 7 5, 5 5)))"), + (stc.ST_MPolyFromText, ("mpoly", 4326), "constructor", "" , "MULTIPOLYGON (((0 0, 20 0, 20 20, 0 20, 0 0), (5 5, 5 7, 7 7, 7 5, 5 5)))"), + (stc.ST_MLineFromText, ("mline", ), "constructor", "" , "MULTILINESTRING ((1 2, 3 4), (4 5, 6 7))"), + (stc.ST_MLineFromText, ("mline", 4326), "constructor", "" , "MULTILINESTRING ((1 2, 3 4), (4 5, 6 7))"), (stc.ST_PointFromText, ("single_point", lambda: f.lit(',')), "constructor", "", "POINT (0 1)"), (stc.ST_MakePoint, ("x", "y", "z"), "constructor", "", "POINT Z (0 1 2)"), (stc.ST_PolygonFromEnvelope, ("minx", "miny", "maxx", "maxy"), "min_max_x_y", "", "POLYGON ((0 1, 0 3, 2 3, 2 1, 0 1))"), @@ -144,7 +157,7 @@ (stf.ST_SetPoint, ("line", 1, lambda: f.expr("ST_Point(1.0, 1.0)")), "linestring_geom", "", "LINESTRING (0 0, 1 1, 2 0, 3 0, 4 0, 5 0)"), (stf.ST_SetSRID, ("point", 3021), "point_geom", "ST_SRID(geom)", 3021), (stf.ST_SimplifyPreserveTopology, ("geom", 0.2), "0.9_poly", "", "POLYGON ((0 0, 1 0, 1 1, 0 0))"), - (stf.ST_Split, ("a", "b"), "overlapping_polys", "", "MULTIPOLYGON (((1 0, 0 0, 0 1, 1 1, 1 0)), ((2 0, 2 1, 3 1, 3 0, 2 0)))"), + (stf.ST_Split, ("line", "points"), "multipoint_splitting_line", "", "MULTILINESTRING ((0 0, 0.5 0.5), (0.5 0.5, 1 1), (1 1, 1.5 1.5, 2 2))"), (stf.ST_SRID, ("point",), "point_geom", "", 0), (stf.ST_StartPoint, ("line",), "linestring_geom", "", "POINT (0 0)"), (stf.ST_SubDivide, ("line", 5), "linestring_geom", "", ["LINESTRING (0 0, 2.5 0)", "LINESTRING (2.5 0, 5 0)"]), @@ -355,6 +368,8 @@ class TestDataFrameAPI(TestBase): @pytest.fixture def base_df(self, request): wkb = '0102000000020000000000000084d600c00000000080b5d6bf00000060e1eff7bf00000080075de5bf' + mpoly = 'MULTIPOLYGON(((0 0 ,20 0 ,20 20 ,0 20 ,0 0 ),(5 5 ,5 7 ,7 7 ,7 5 ,5 5)))' + mline = 'MULTILINESTRING((1 2, 3 4), (4 5, 6 7))' geojson = "{ \"type\": \"Feature\", \"properties\": { \"prop\": \"01\" }, \"geometry\": { \"type\": \"Point\", \"coordinates\": [ 0.0, 1.0 ] }}," gml_string = "-71.16,42.25 -71.17,42.25 -71.18,42.25" kml_string = "-71.16,42.26 -71.17,42.26" @@ -367,6 +382,8 @@ def base_df(self, request): "'0.0,1.0' AS single_point", "'0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0' AS multiple_point", f"X'{wkb}' AS wkb", + f"'{mpoly}' AS mpoly", + f"'{mline}' AS mline", f"'{geojson}' AS geojson", "'s00twy01mt' AS geohash", f"'{gml_string}' AS gml", @@ -434,6 +451,8 @@ def base_df(self, request): return TestDataFrameAPI.spark.sql("SELECT ST_GeomFromWKT('POINT (0.0 1.0)') AS point, ST_GeomFromWKT('LINESTRING (0 0, 1 0, 2 0, 3 0, 4 0, 5 0)') AS line") elif request.param == "line_and_point": return TestDataFrameAPI.spark.sql("SELECT ST_GeomFromWKT('LINESTRING (0 2, 1 1, 2 0)') AS line, ST_GeomFromWKT('POINT (0 0)') AS point") + elif request.param == "multipoint_splitting_line": + return TestDataFrameAPI.spark.sql("SELECT ST_GeomFromWKT('LINESTRING (0 0, 1.5 1.5, 2 2)') AS line, ST_GeomFromWKT('MULTIPOINT (0.5 0.5, 1 1)') AS points") elif request.param == "origin_and_point": return TestDataFrameAPI.spark.sql("SELECT ST_GeomFromWKT('POINT (0 0)') AS origin, ST_GeomFromWKT('POINT (1 0)') as point") raise ValueError(f"Invalid base_df name passed: {request.param}")