Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 89 additions & 49 deletions python/sedona/geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@
from sedona.geopandas.geodataframe import GeoDataFrame
from sedona.geopandas.geoindex import GeoIndex

from pyspark.pandas.internal import SPARK_DEFAULT_INDEX_NAME # __index_level_0__
from pyspark.pandas.internal import (
SPARK_DEFAULT_INDEX_NAME, # __index_level_0__
NATURAL_ORDER_COLUMN_NAME,
)


class GeoSeries(GeoFrame, pspd.Series):
Expand Down Expand Up @@ -158,14 +161,7 @@ def __init__(
gs.apply(lambda geom: geom.wkb if geom is not None else None)
)
# initialize the parent class pyspark Series with the pandas Series
super().__init__(
data=pdf,
index=index,
dtype=dtype,
name=name,
copy=copy,
fastpath=fastpath,
)
super().__init__(data=pdf)
Comment thread
zhangfengcdt marked this conversation as resolved.

if crs:
self.set_crs(crs, inplace=True)
Expand Down Expand Up @@ -904,7 +900,7 @@ def intersects(
An object is said to intersect `other` if its `boundary` and `interior`
intersects in any way with those of the other.

The operation works on a 1-to-1 row-wise manner:
The operation works on a 1-to-1 row-wise manner.

Parameters
----------
Expand All @@ -913,7 +909,7 @@ def intersects(
intersected.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved. (not supported in Sedona Geopandas)
If False, the order of elements is preserved.

Returns
-------
Expand All @@ -935,23 +931,26 @@ def intersects(
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
... Point(1, 1),
... Point(-100, -100),
... Point(0, 1),
... ],
... index=range(1, 5),
... )

We can check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We align both GeoSeries
based on index values and compare elements with the same index:
>>> s
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 LINESTRING (0 0, 2 2)
2 LINESTRING (2 0, 0 2)
3 POINT (0 1)
dtype: geometry

>>> s.intersects(s2)
0 True
1 True
2 True
3 False
dtype: bool
>>> s2
1 LINESTRING (1 0, 1 3)
2 LINESTRING (2 0, 0 2)
3 POINT (1 1)
4 POINT (0 1)
dtype: geometry

We can also check if each geometry of GeoSeries intersects a single
We can check if each geometry of GeoSeries crosses a single
geometry:

>>> line = LineString([(-1, 1), (3, 1)])
Expand All @@ -962,6 +961,27 @@ def intersects(
3 True
dtype: bool

We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:

>>> s.intersects(s2, align=True)
0 False
1 True
2 True
3 False
4 False
dtype: bool

>>> s.intersects(s2, align=False)
0 True
1 True
2 True
3 True
dtype: bool

Notes
-----
This method works in a row-wise manner. It does not check if an element
Expand All @@ -988,7 +1008,7 @@ def intersection(
"""Returns a ``GeoSeries`` of the intersection of points in each
aligned geometry with `other`.

The operation works on a 1-to-1 row-wise manner:
The operation works on a 1-to-1 row-wise manner.

Parameters
----------
Expand All @@ -997,7 +1017,7 @@ def intersection(
intersection with.
align : bool | None (default None)
If True, automatically aligns GeoSeries based on their indices. None defaults to True.
If False, the order of elements is preserved. (not supported in Sedona Geopandas)
If False, the order of elements is preserved.

Returns
-------
Expand All @@ -1021,43 +1041,62 @@ def intersection(
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
... Point(1, 1),
... Point(-100, -100),
... Point(0, 1),
... ],
... index=range(1, 6),
... )

We can do an intersection of each geometry and a single
shapely geometry:
>>> s
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 LINESTRING (0 0, 2 2)
3 LINESTRING (2 0, 0 2)
4 POINT (0 1)
dtype: geometry

>>> geom = Polygon([(-0.5, -0.5), (-0.5, 2.5), (2.5, 2.5), (2.5, -0.5), (-0.5, -0.5)])
>>> s.intersection(geom)
Polygon([(0, 0), (2, 2), (0, 2)]),
Polygon([(0, 0), (2, 2), (0, 2)]),
LineString([(0, 0), (2, 2)]),
LineString([(2, 0), (0, 2)]),
Point(0, 1),
>>> s2
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
2 LINESTRING (1 0, 1 3)
3 LINESTRING (2 0, 0 2)
4 POINT (1 1)
5 POINT (0 1)
dtype: geometry

>>> geom = Polygon([(-0.5, -0.5), (-0.5, 2.5), (2.5, 2.5), (2.5, -0.5), (-0.5, -0.5)])
We can also do intersection of each geometry and a single
shapely geometry:

>>> s.intersection(Polygon([(0, 0), (1, 1), (0, 1)]))
0 POLYGON ((0 0, 2 2, 0 2))
1 POLYGON ((0 0, 2 2, 0 2))
2 LINESTRING (0 0, 2 2)
3 LINESTRING (2 0, 0 2)
0 POLYGON ((0 0, 0 1, 1 1, 0 0))
1 POLYGON ((0 0, 0 1, 1 1, 0 0))
2 LINESTRING (0 0, 1 1)
3 POINT (1 1)
4 POINT (0 1)
dtype: geometry

We can also check two GeoSeries against each other, row by row.
The GeoSeries above have different indices. We align both GeoSeries
based on index values and compare elements with the same index.
The GeoSeries above have different indices. We can either align both GeoSeries
based on index values and compare elements with the same index using
``align=True`` or ignore index and compare elements based on their matching
order using ``align=False``:

>>> s.intersection(s2, align=True)
0 None
1 POLYGON ((0 0, 0 1, 1 1, 0 0))
2 POINT (1 1)
3 LINESTRING (2 0, 0 2)
4 POINT EMPTY
5 None
dtype: geometry

>>> s.intersection(s2)
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
>>> s.intersection(s2, align=False)
0 POLYGON ((0 0, 0 1, 1 1, 0 0))
1 LINESTRING (1 1, 1 2)
2 POINT (1 1)
3 POINT (1 1)
4 POLYGON EMPTY
4 POINT (0 1)
dtype: geometry


See Also
--------
GeoSeries.difference
Expand All @@ -1082,8 +1121,9 @@ def _row_wise_operation(
from pyspark.sql.functions import col

# Note: this is specifically False. None is valid since it defaults to True similar to geopandas
if align is False:
raise NotImplementedError("Sedona Geopandas does not support align=False")
index_col = (
NATURAL_ORDER_COLUMN_NAME if align is False else SPARK_DEFAULT_INDEX_NAME
)

if isinstance(other, BaseGeometry):
other = GeoSeries([other] * len(self))
Expand All @@ -1093,13 +1133,13 @@ def _row_wise_operation(
# TODO: this does not yet support multi-index
df = self._internal.spark_frame.select(
col(self.get_first_geometry_column()).alias("L"),
col(SPARK_DEFAULT_INDEX_NAME),
col(index_col),
)
other_df = other._internal.spark_frame.select(
col(other.get_first_geometry_column()).alias("R"),
col(SPARK_DEFAULT_INDEX_NAME),
col(index_col),
)
joined_df = df.join(other_df, on=SPARK_DEFAULT_INDEX_NAME, how="outer")
joined_df = df.join(other_df, on=index_col, how="outer")
return self._query_geometry_column(
select,
cols=["L", "R"],
Expand Down
68 changes: 65 additions & 3 deletions python/tests/geopandas/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,10 @@ def check_sgpd_equals_gpd(self, actual: sgpd.GeoSeries, expected: gpd.GeoSeries)
assert len(actual) == len(expected)
sgpd_result = actual.to_geopandas()
for a, e in zip(sgpd_result, expected):
if a.is_empty and e.is_empty:
if a is None or e is None:
assert a is None and e is None
continue
elif a.is_empty and e.is_empty:
continue
self.assert_geometry_almost_equal(a, e)

Expand Down Expand Up @@ -377,6 +380,23 @@ def test_intersects(self):
expected = pd.Series([True, True, True, True])
assert_series_equal(result.to_pandas(), expected)

# from the original doc string
s2 = sgpd.GeoSeries(
[
LineString([(1, 0), (1, 3)]),
LineString([(2, 0), (0, 2)]),
Point(1, 1),
Point(0, 1),
],
index=range(1, 5),
)

result = s.intersects(s2, align=True)
expected = pd.Series([False, True, True, False, False])

result = s.intersects(s2, align=False)
expected = pd.Series([True, True, True, True])

def test_intersection(self):
s = sgpd.GeoSeries(
[
Expand Down Expand Up @@ -424,8 +444,50 @@ def test_intersection(self):
)
self.check_sgpd_equals_gpd(result, expected)

with pytest.raises(NotImplementedError):
s.intersection(s2, align=False)
# from the original doc string
s = sgpd.GeoSeries(
[
Polygon([(0, 0), (2, 2), (0, 2)]),
Polygon([(0, 0), (2, 2), (0, 2)]),
LineString([(0, 0), (2, 2)]),
LineString([(2, 0), (0, 2)]),
Point(0, 1),
],
)
s2 = sgpd.GeoSeries(
[
Polygon([(0, 0), (1, 1), (0, 1)]),
LineString([(1, 0), (1, 3)]),
LineString([(2, 0), (0, 2)]),
Point(1, 1),
Point(0, 1),
],
index=range(1, 6),
)
result = s.intersection(s2, align=True)
expected = gpd.GeoSeries(
[
None,
Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
Point(1, 1),
LineString([(2, 0), (0, 2)]),
Point(),
None,
]
)
self.check_sgpd_equals_gpd(result, expected)

result = s.intersection(s2, align=False)
expected = gpd.GeoSeries(
[
Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
LineString([(1, 1), (1, 2)]),
Point(1, 1),
Point(1, 1),
Point(0, 1),
]
)
self.check_sgpd_equals_gpd(result, expected)

def test_intersection_all(self):
pass
Expand Down
30 changes: 29 additions & 1 deletion python/tests/geopandas/test_match_geopandas_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,15 @@ def test_intersects(self):
gpd_result = gpd.GeoSeries(geom).intersects(gpd.GeoSeries(geom2))
self.check_pd_series_equal(sgpd_result, gpd_result)

if len(geom) == len(geom2):
sgpd_result = GeoSeries(geom).intersects(
GeoSeries(geom2), align=False
)
gpd_result = gpd.GeoSeries(geom).intersects(
gpd.GeoSeries(geom2), align=False
)
self.check_pd_series_equal(sgpd_result, gpd_result)

def test_intersection(self):
geometries = [
Polygon([(0, 0), (1, 0), (1, 1)]),
Expand All @@ -475,6 +484,22 @@ def test_intersection(self):
gpd_result = gpd.GeoSeries(g1).intersection(gpd.GeoSeries(g2))
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)

# Ensure both align True and False work correctly
for _, g1 in self.geoms:
for _, g2 in self.geoms:
gpd_series1, gpd_series2 = gpd.GeoSeries(g1), gpd.GeoSeries(g2)
# The original geopandas intersection method fails on invalid geometries
if not gpd_series1.is_valid.all() or not gpd_series2.is_valid.all():
continue
sgpd_result = GeoSeries(g1).intersection(GeoSeries(g2))
gpd_result = gpd_series1.intersection(gpd_series2)
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)

if len(g1) == len(g2):
sgpd_result = GeoSeries(g1).intersects(GeoSeries(g2), align=False)
gpd_result = gpd_series1.intersects(gpd_series2, align=False)
self.check_pd_series_equal(sgpd_result, gpd_result)

def test_intersection_all(self):
pass

Expand Down Expand Up @@ -518,8 +543,11 @@ def check_sgpd_equals_gpd(self, actual: GeoSeries, expected: gpd.GeoSeries):
assert isinstance(expected, gpd.GeoSeries)
sgpd_result = actual.to_geopandas()
for a, e in zip(sgpd_result, expected):
if a is None or e is None:
assert a is None and e is None
continue
# Sometimes sedona and geopandas both return empty geometries but of different types (e.g Point and Polygon)
if a.is_empty and e.is_empty:
elif a.is_empty and e.is_empty:
continue
self.assert_geometry_almost_equal(
a, e, tolerance=1e-2
Expand Down
Loading