Skip to content

Commit

Permalink
Release Apache Sedona 1.5.2 (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
jiayuasu committed May 7, 2024
1 parent b91f3b2 commit 819cd69
Show file tree
Hide file tree
Showing 20 changed files with 166 additions and 204 deletions.
2 changes: 2 additions & 0 deletions python/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Apache Sedona

<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=acc24e73-991b-4e92-8a6d-e33f333a645d" />

This library is the Python wrapper for Apache Sedona.

Apache Sedona is a cluster computing system for processing large-scale spatial data.
Expand Down
2 changes: 2 additions & 0 deletions python/build_wheel.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
Expand Down
2 changes: 0 additions & 2 deletions python/sedona/core/SpatialRDD/spatial_rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from sedona.core.enums.index_type import IndexTypeJvm, IndexType
from sedona.core.enums.spatial import SpatialType
from sedona.core.geom.envelope import Envelope
from sedona.core.jvm.config import since
from sedona.core.jvm.translate import SedonaPythonConverter, JvmSedonaPythonConverter
from sedona.utils.decorators import require
from sedona.utils.jvm import JvmStorageLevel
Expand Down Expand Up @@ -193,7 +192,6 @@ def countWithoutDuplicatesSPRDD(self) -> int:
return self._srdd.countWithoutDuplicatesSPRDD()

@property
@since("1.0.0")
def fieldNames(self) -> List[str]:
"""
Expand Down
15 changes: 5 additions & 10 deletions python/sedona/core/jvm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import logging
import os
from re import findall
from typing import Any, Optional, Tuple
from typing import Optional, Tuple

from py4j.protocol import Py4JJavaError
from pyspark.sql import SparkSession
Expand Down Expand Up @@ -89,13 +89,11 @@ def decorator(func1):

@functools.wraps(func1)
def new_func1(*args, **kwargs):
warnings.simplefilter('always', DeprecationWarning)
warnings.warn(
fmt1.format(name=func1.__name__, reason=reason),
category=DeprecationWarning,
stacklevel=2
)
warnings.simplefilter('default', DeprecationWarning)
return func1(*args, **kwargs)

return new_func1
Expand All @@ -121,13 +119,11 @@ def new_func1(*args, **kwargs):

@functools.wraps(func2)
def new_func2(*args, **kwargs):
warnings.simplefilter('always', DeprecationWarning)
warnings.warn(
fmt2.format(name=func2.__name__),
category=DeprecationWarning,
stacklevel=2
)
warnings.simplefilter('default', DeprecationWarning)
return func2(*args, **kwargs)

return new_func2
Expand Down Expand Up @@ -191,10 +187,9 @@ def get_spark_java_config(

try:
used_jar_files = java_spark_conf.get(value)
except Py4JJavaError as java_error:
error_message = "Failed to get the value of {} from SparkConf: {}".format(
value, java_error
)
except Py4JJavaError:
error_message = "Didn't find the value of {} from SparkConf".format(value)
logging.info(error_message)

return used_jar_files, error_message

Expand All @@ -204,7 +199,7 @@ class SedonaMeta:
def get_version(cls, spark_jars: str) -> Optional[str]:
# Find Spark version, Scala version and Sedona version.
versions = findall(
r"sedona-(?:python-adapter|spark-shaded)-([^,\n]{3})_([^,\n]{4})-([^,\n]{5})",
r"sedona-(?:python-adapter|spark-shaded|spark)-([^,\n]{3})_([^,\n]{4})-([^,\n]{5})",
spark_jars,
)
print(versions)
Expand Down
10 changes: 8 additions & 2 deletions python/sedona/maps/SedonaKepler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# specific language governing permissions and limitations
# under the License.

from keplergl import KeplerGl
from sedona.maps.SedonaMapUtils import SedonaMapUtils


Expand All @@ -30,6 +29,13 @@ def create_map(cls, df=None, name="unnamed", config=None):
dataframe, if a df is passed with no name, a default name of 'unnamed' is set for it.
param config: [Optional] A map config to be applied to the rendered map :return: A map object
"""

try:
from keplergl import KeplerGl
except ImportError:
msg = "Install sedona[kepler-map] to convert sedona dataframes to kepler maps."
raise ImportError(msg) from None

kepler_map = KeplerGl()
if df is not None:
SedonaKepler.add_df(kepler_map, df, name)
Expand All @@ -48,5 +54,5 @@ def add_df(cls, kepler_map, df, name="unnamed"):
:param name: [Optional] Name to assign to the dataframe, default name assigned is 'unnamed'
:return: Does not return anything, adds df directly to the given map object
"""
geo_df = SedonaMapUtils.__convert_to_gdf__(df)
geo_df = SedonaMapUtils.__convert_to_gdf_or_pdf__(df)
kepler_map.add_data(geo_df, name=name)
12 changes: 9 additions & 3 deletions python/sedona/maps/SedonaMapUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,20 @@
class SedonaMapUtils:

@classmethod
def __convert_to_gdf__(cls, df, rename=True, geometry_col=None):
def __convert_to_gdf_or_pdf__(cls, df, rename=True, geometry_col=None):
"""
Converts a SedonaDataFrame to a GeoPandasDataFrame and also renames geometry column to a standard name of
'geometry' :param df: SedonaDataFrame to convert :param geometry_col: [Optional] :return:
'geometry'
However, if no geometry column is found even after traversing schema, returns a Pandas Dataframe
:param df: SedonaDataFrame to convert
:param geometry_col: [Optional]
:return: GeoPandas Dataframe or Pandas Dataframe
"""
if geometry_col is None:
geometry_col = SedonaMapUtils.__get_geometry_col__(df)
pandas_df = df.toPandas()
if geometry_col is None: # No geometry column found even after searching schema, return Pandas Dataframe
return pandas_df
geo_df = gpd.GeoDataFrame(pandas_df, geometry=geometry_col)
if geometry_col != "geometry" and rename is True:
geo_df.rename_geometry("geometry", inplace=True)
Expand All @@ -44,7 +50,7 @@ def __convert_to_geojson__(cls, df):
:param df: SedonaDataFrame to convert
:return: GeoJSON object
"""
gdf = SedonaMapUtils.__convert_to_gdf__(df)
gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df)
gjson_str = gdf.to_json()
gjson = json.loads(gjson_str)
return gjson
Expand Down
22 changes: 20 additions & 2 deletions python/sedona/maps/SedonaPyDeck.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

import pydeck as pdk
from types import ModuleType
from sedona.maps.SedonaMapUtils import SedonaMapUtils


Expand All @@ -37,6 +37,7 @@ def create_choropleth_map(cls, df, fill_color=None, plot_col=None, initial_view_
:param map_provider:
:return: A pydeck Map object with choropleth layer added:
"""
pdk = _try_import_pydeck()

if initial_view_state is None:
gdf = SedonaPyDeck._prepare_df_(df, add_coords=True)
Expand Down Expand Up @@ -79,6 +80,8 @@ def create_geometry_map(cls, df, fill_color="[85, 183, 177, 255]", line_color="[
:param map_provider: optional map_provider of the pydeck map
:return: A pydeck map with a GeoJsonLayer map added
"""
pdk = _try_import_pydeck()

geometry_col = SedonaMapUtils.__get_geometry_col__(df)
gdf = SedonaPyDeck._prepare_df_(df, geometry_col=geometry_col)
geom_type = gdf[geometry_col][0].geom_type
Expand Down Expand Up @@ -116,6 +119,8 @@ def create_scatterplot_map(cls, df, fill_color="[255, 140, 0]", radius_col=1, ra
:param map_provider: optional map_provider to be added to the pydeck map
:return: A pydeck map object with a scatterplot layer added
"""
pdk = _try_import_pydeck()

gdf = SedonaPyDeck._prepare_df_(df, add_coords=True)
layer = pdk.Layer(
"ScatterplotLayer",
Expand Down Expand Up @@ -152,6 +157,7 @@ def create_heatmap(cls, df, color_range=None, weight=1, aggregation="SUM", initi
:param map_provider: Optional map_provider for the pydeck map
:return: A pydeck map with a heatmap layer added
"""
pdk = _try_import_pydeck()

gdf = SedonaPyDeck._prepare_df_(df, add_coords=True)

Expand Down Expand Up @@ -195,7 +201,7 @@ def _prepare_df_(cls, df, add_coords=False, geometry_col=None):
"""
if geometry_col is None:
geometry_col = SedonaMapUtils.__get_geometry_col__(df=df)
gdf = SedonaMapUtils.__convert_to_gdf__(df, rename=False, geometry_col=geometry_col)
gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df, rename=False, geometry_col=geometry_col)
if add_coords is True:
SedonaPyDeck._create_coord_column_(gdf=gdf, geometry_col=geometry_col)
return gdf
Expand Down Expand Up @@ -239,6 +245,7 @@ def _create_coord_column_(cls, gdf, geometry_col, add_points=False):

@classmethod
def _create_fat_layer_(cls, gdf, fill_color, line_color, elevation_col):
pdk = _try_import_pydeck()
layer = pdk.Layer(
'GeoJsonLayer', # `type` positional argument is here
data=gdf,
Expand All @@ -254,3 +261,14 @@ def _create_fat_layer_(cls, gdf, fill_color, line_color, elevation_col):
)

return layer


def _try_import_pydeck() -> ModuleType:
try:
import pydeck as pdk

except ImportError:
msg = "Install sedona[pydeck-map] to convert sedona dataframes to pydeck maps."
raise ImportError(msg) from None

return pdk
3 changes: 1 addition & 2 deletions python/sedona/raster_utils/SedonaUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@
# specific language governing permissions and limitations
# under the License.

from IPython.display import display, HTML

class SedonaUtils:
@classmethod
def display_image(cls, df):
from IPython.display import display, HTML
display(HTML(df.toPandas().to_html(escape=False)))
2 changes: 1 addition & 1 deletion python/sedona/spark/SedonaContext.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def create(cls, spark: SparkSession) -> SparkSession:
"""
spark.sql("SELECT 1 as geom").count()
PackageImporter.import_jvm_lib(spark._jvm)
spark._jvm.SedonaContext.create(spark._jsparkSession)
spark._jvm.SedonaContext.create(spark._jsparkSession, "python")
return spark

@classmethod
Expand Down
14 changes: 6 additions & 8 deletions python/sedona/spark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,9 @@
from sedona.register import SedonaRegistrator
from sedona.spark.SedonaContext import SedonaContext
from sedona.raster_utils.SedonaUtils import SedonaUtils
try:
from sedona.maps.SedonaKepler import SedonaKepler
except:
print('Skipping SedonaKepler import, verify if keplergl is installed')
try:
from sedona.maps.SedonaPyDeck import SedonaPyDeck
except:
print('Skipping SedonaPyDeck import, verify if pydeck is installed')
from sedona.maps.SedonaKepler import SedonaKepler
from sedona.maps.SedonaPyDeck import SedonaPyDeck
from sedona.sql.st_aggregates import *
from sedona.sql.st_constructors import *
from sedona.sql.st_functions import *
from sedona.sql.st_predicates import *
10 changes: 5 additions & 5 deletions python/sedona/sql/st_aggregates.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import inspect
import sys

from functools import partial

Expand All @@ -23,11 +25,9 @@

_call_aggregate_function = partial(call_sedona_function, "st_aggregates")

__all__ = [
"ST_Envelope_Aggr",
"ST_Intersection_Aggr",
"ST_Union_Aggr",
]
# Automatically populate __all__
__all__ = [name for name, obj in inspect.getmembers(sys.modules[__name__])
if inspect.isfunction(obj)]


@validate_argument_types
Expand Down
48 changes: 21 additions & 27 deletions python/sedona/sql/st_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import inspect
import sys

from functools import partial
from typing import Optional, Union
Expand All @@ -23,25 +25,9 @@
from sedona.sql.dataframe_api import ColumnOrName, ColumnOrNameOrNumber, call_sedona_function, validate_argument_types


__all__ = [
"ST_GeomFromGeoHash",
"ST_GeomFromGeoJSON",
"ST_GeomFromGML",
"ST_GeomFromKML",
"ST_GeomFromText",
"ST_GeomFromWKB",
"ST_GeomFromWKT",
"ST_GeomFromEWKT",
"ST_LineFromText",
"ST_LineStringFromText",
"ST_Point",
"ST_PointFromText",
"ST_MakePoint"
"ST_PolygonFromEnvelope",
"ST_PolygonFromText",
"ST_MLineFromText",
"ST_MPolyFromText"
]
# Automatically populate __all__
__all__ = [name for name, obj in inspect.getmembers(sys.modules[__name__])
if inspect.isfunction(obj)]


_call_constructor_function = partial(call_sedona_function, "st_constructors")
Expand Down Expand Up @@ -98,7 +84,7 @@ def ST_GeomFromKML(kml_string: ColumnOrName) -> Column:


@validate_argument_types
def ST_GeomFromText(wkt: ColumnOrName) -> Column:
def ST_GeomFromText(wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = None) -> Column:
"""Generate a geometry column from a Well-Known Text (WKT) string column.
This is an alias of ST_GeomFromWKT.
Expand All @@ -107,7 +93,9 @@ def ST_GeomFromText(wkt: ColumnOrName) -> Column:
:return: Geometry column representing the WKT string.
:rtype: Column
"""
return _call_constructor_function("ST_GeomFromText", wkt)
args = (wkt) if srid is None else (wkt, srid)

return _call_constructor_function("ST_GeomFromText", args)


@validate_argument_types
Expand All @@ -123,7 +111,7 @@ def ST_GeomFromWKB(wkb: ColumnOrName) -> Column:


@validate_argument_types
def ST_GeomFromWKT(wkt: ColumnOrName) -> Column:
def ST_GeomFromWKT(wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = None) -> Column:
"""Generate a geometry column from a Well-Known Text (WKT) string column.
This is an alias of ST_GeomFromText.
Expand All @@ -132,7 +120,9 @@ def ST_GeomFromWKT(wkt: ColumnOrName) -> Column:
:return: Geometry column representing the WKT string.
:rtype: Column
"""
return _call_constructor_function("ST_GeomFromWKT", wkt)
args = (wkt) if srid is None else (wkt, srid)

return _call_constructor_function("ST_GeomFromWKT", args)

@validate_argument_types
def ST_GeomFromEWKT(ewkt: ColumnOrName) -> Column:
Expand Down Expand Up @@ -275,23 +265,27 @@ def ST_PolygonFromText(coords: ColumnOrName, delimiter: ColumnOrName) -> Column:
return _call_constructor_function("ST_PolygonFromText", (coords, delimiter))

@validate_argument_types
def ST_MPolyFromText(wkt: ColumnOrName) -> Column:
def ST_MPolyFromText(wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = None) -> Column:
"""Generate multiPolygon geometry from a multiPolygon WKT representation.
:param wkt: multiPolygon WKT string column to generate from.
:type wkt: ColumnOrName
:return: multiPolygon geometry generated from the wkt column.
:rtype: Column
"""
return _call_constructor_function("ST_MPolyFromText", wkt)
args = (wkt) if srid is None else (wkt, srid)

return _call_constructor_function("ST_MPolyFromText", args)

@validate_argument_types
def ST_MLineFromText(wkt: ColumnOrName) -> Column:
def ST_MLineFromText(wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = None) -> Column:
"""Generate multiLineString geometry from a multiLineString WKT representation.
:param wkt: multiLineString WKT string column to generate from.
:type wkt: ColumnOrName
:return: multiLineString geometry generated from the wkt column.
:rtype: Column
"""
return _call_constructor_function("ST_MLineFromText", wkt)
args = (wkt) if srid is None else (wkt, srid)

return _call_constructor_function("ST_MLineFromText", args)
Loading

0 comments on commit 819cd69

Please sign in to comment.