Skip to content

Commit

Permalink
ENH: Implement Morton distance (#90)
Browse files Browse the repository at this point in the history
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Brendan Ward <bcward@astutespruce.com>
Co-authored-by: Martin Fleischmann <martin@martinfleischmann.net>
  • Loading branch information
4 people committed Aug 19, 2021
1 parent 99fa1a8 commit bcbdfd1
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 0 deletions.
3 changes: 3 additions & 0 deletions continuous_integration/envs/37-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@ dependencies:
- pytest
- pytest-cov
- hilbertcurve
- pip
- pip:
- pymorton
3 changes: 3 additions & 0 deletions continuous_integration/envs/38-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ dependencies:
- hilbertcurve
# optional dependencies
- pyarrow
- pip
- pip:
- pymorton
1 change: 1 addition & 0 deletions continuous_integration/envs/39-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dependencies:
- pyarrow
- pip
- pip:
- pymorton
- git+https://github.com/geopandas/geopandas.git@master
- git+https://github.com/pygeos/pygeos.git@master
- git+https://github.com/dask/dask.git@main
3 changes: 3 additions & 0 deletions continuous_integration/envs/39-latest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ dependencies:
- hilbertcurve
# optional dependencies
- pyarrow
- pip
- pip:
- pymorton
43 changes: 43 additions & 0 deletions dask_geopandas/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from shapely.geometry.base import BaseGeometry
from shapely.geometry import box
from .hilbert_distance import _hilbert_distance
from .morton_distance import _morton_distance


def _set_crs(df, crs, allow_override):
Expand Down Expand Up @@ -343,6 +344,48 @@ def hilbert_distance(self, p=15):

return distances

def morton_distance(self, p=15):

"""
Calculate the distance of geometries along the Morton curve
The Morton curve is also known as Z-order https://en.wikipedia.org/wiki/Z-order.
The Morton distance can be used to spatially partition Dask-GeoPandas objects,
by mapping two-dimensional geometries along the Morton space-filing curve.
Each geometry is represented by the midpoint of its bounds and linked to the
Morton curve. The function returns a distance from the beginning
of the curve to the linked point.
Morton distance is more performant than ``hilbert_distance`` but can result in
less optimal partitioning.
Parameters
----------
p : int
precision of the Morton curve
Returns
----------
type : dask.Series
Series containing distances along the Morton curve
"""

# Compute total bounds of all partitions rather than each partition
total_bounds = self.total_bounds

# Calculate Morton distances for each partition
distances = self.map_partitions(
_morton_distance,
total_bounds=total_bounds,
p=p,
meta=pd.Series([], name="morton_distance", dtype="int"),
)

return distances


class GeoSeries(_Frame, dd.core.Series):
_partition_type = geopandas.GeoSeries
Expand Down
76 changes: 76 additions & 0 deletions dask_geopandas/morton_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pandas as pd
from dask_geopandas.hilbert_distance import _continuous_to_discrete_coords


def _morton_distance(gdf, total_bounds, p):
"""
Calculate distance of geometries along Morton curve
The Morton curve is also known as Z-order https://en.wikipedia.org/wiki/Z-order
Parameters
----------
gdf : GeoDataFrame
total_bounds : array_like
array containing xmin, ymin, xmax, ymax
p : int
precision of the Morton curve
Returns
-------
type : pandas.Series
Series containing distances from Morton curve
"""

# Calculate bounds as numpy array
bounds = gdf.bounds.to_numpy()
# Calculate discrete coords based on total bounds and bounds
coords = _continuous_to_discrete_coords(total_bounds, bounds, p)
# Calculate distance from morton curve
distances = _distances_from_coordinates(coords)

return pd.Series(distances, index=gdf.index, name="morton_distance")


def _distances_from_coordinates(coords):
"""
Calculate distances from geometry mid-points along Morton curve
Parameters
----------
coords : array_like
x, y coordinate pairs based on mid-points of geoms
Returns
-------
type : int
Integer distances from Morton curve
"""

return _part1by1(coords[:, 0]) | (_part1by1(coords[:, 1]) << 1)


def _part1by1(n):
"""
Interleave bits by ninary magic numbers
Based on #http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
Parameters
----------
n : np.array
X or Y coordinates
Returns
-------
n : int
Interleaved bits
"""
n &= 0x0000FFFF
n = (n | (n << 8)) & 0x00FF00FF
n = (n | (n << 4)) & 0x0F0F0F0F
n = (n | (n << 2)) & 0x33333333
n = (n | (n << 1)) & 0x55555555

return n
66 changes: 66 additions & 0 deletions tests/test_morton_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import pytest
import pandas as pd
from pandas.testing import assert_index_equal
from pymorton import interleave # https://github.com/trevorprater/pymorton
from dask_geopandas.hilbert_distance import _continuous_to_discrete_coords
from dask_geopandas import from_geopandas
import geopandas
from shapely.geometry import Point, LineString, Polygon


@pytest.fixture
def geoseries_points():
p1 = Point(1, 2)
p2 = Point(2, 3)
p3 = Point(3, 4)
p4 = Point(4, 1)
return geopandas.GeoSeries([p1, p2, p3, p4])


@pytest.fixture
def geoseries_lines():
l1 = LineString([(0, 0), (0, 1), (1, 1)])
l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)])
return geopandas.GeoSeries([l1, l2] * 2)


@pytest.fixture()
def geoseries_polygons():
t1 = Polygon([(0, 3.5), (7, 2.4), (1, 0.1)])
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
sq1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
sq2 = Polygon([(0, 0), (1, 0), (1, 2), (0, 2)])
return geopandas.GeoSeries([t1, t2, sq1, sq2])


def morton_distance_dask(geoseries):

bounds = geoseries.bounds.to_numpy()
total_bounds = geoseries.total_bounds
coords = _continuous_to_discrete_coords(total_bounds, bounds, p=15)

ddf = from_geopandas(geoseries, npartitions=1)
result = ddf.morton_distance().compute()

expected = []

for i in range(len(coords)):
x = int(coords[i][0])
y = int(coords[i][1])
expected.append(interleave(x, y))

assert list(result) == expected
assert isinstance(result, pd.Series)
assert_index_equal(ddf.index.compute(), result.index)


def test_morton_distance_points(geoseries_points):
morton_distance_dask(geoseries_points)


def test_morton_distance_lines(geoseries_lines):
morton_distance_dask(geoseries_lines)


def test_morton_distance_polygons(geoseries_polygons):
morton_distance_dask(geoseries_polygons)

0 comments on commit bcbdfd1

Please sign in to comment.