-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ENH: Implement Morton distance (#90)
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Co-authored-by: Brendan Ward <bcward@astutespruce.com> Co-authored-by: Martin Fleischmann <martin@martinfleischmann.net>
- Loading branch information
1 parent
99fa1a8
commit bcbdfd1
Showing
7 changed files
with
195 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,3 +14,6 @@ dependencies: | |
- pytest | ||
- pytest-cov | ||
- hilbertcurve | ||
- pip | ||
- pip: | ||
- pymorton |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,3 +16,6 @@ dependencies: | |
- hilbertcurve | ||
# optional dependencies | ||
- pyarrow | ||
- pip | ||
- pip: | ||
- pymorton |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,3 +16,6 @@ dependencies: | |
- hilbertcurve | ||
# optional dependencies | ||
- pyarrow | ||
- pip | ||
- pip: | ||
- pymorton |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import pandas as pd | ||
from dask_geopandas.hilbert_distance import _continuous_to_discrete_coords | ||
|
||
|
||
def _morton_distance(gdf, total_bounds, p): | ||
""" | ||
Calculate distance of geometries along Morton curve | ||
The Morton curve is also known as Z-order https://en.wikipedia.org/wiki/Z-order | ||
Parameters | ||
---------- | ||
gdf : GeoDataFrame | ||
total_bounds : array_like | ||
array containing xmin, ymin, xmax, ymax | ||
p : int | ||
precision of the Morton curve | ||
Returns | ||
------- | ||
type : pandas.Series | ||
Series containing distances from Morton curve | ||
""" | ||
|
||
# Calculate bounds as numpy array | ||
bounds = gdf.bounds.to_numpy() | ||
# Calculate discrete coords based on total bounds and bounds | ||
coords = _continuous_to_discrete_coords(total_bounds, bounds, p) | ||
# Calculate distance from morton curve | ||
distances = _distances_from_coordinates(coords) | ||
|
||
return pd.Series(distances, index=gdf.index, name="morton_distance") | ||
|
||
|
||
def _distances_from_coordinates(coords): | ||
""" | ||
Calculate distances from geometry mid-points along Morton curve | ||
Parameters | ||
---------- | ||
coords : array_like | ||
x, y coordinate pairs based on mid-points of geoms | ||
Returns | ||
------- | ||
type : int | ||
Integer distances from Morton curve | ||
""" | ||
|
||
return _part1by1(coords[:, 0]) | (_part1by1(coords[:, 1]) << 1) | ||
|
||
|
||
def _part1by1(n): | ||
""" | ||
Interleave bits by ninary magic numbers | ||
Based on #http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN | ||
Parameters | ||
---------- | ||
n : np.array | ||
X or Y coordinates | ||
Returns | ||
------- | ||
n : int | ||
Interleaved bits | ||
""" | ||
n &= 0x0000FFFF | ||
n = (n | (n << 8)) & 0x00FF00FF | ||
n = (n | (n << 4)) & 0x0F0F0F0F | ||
n = (n | (n << 2)) & 0x33333333 | ||
n = (n | (n << 1)) & 0x55555555 | ||
|
||
return n |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import pytest | ||
import pandas as pd | ||
from pandas.testing import assert_index_equal | ||
from pymorton import interleave # https://github.com/trevorprater/pymorton | ||
from dask_geopandas.hilbert_distance import _continuous_to_discrete_coords | ||
from dask_geopandas import from_geopandas | ||
import geopandas | ||
from shapely.geometry import Point, LineString, Polygon | ||
|
||
|
||
@pytest.fixture | ||
def geoseries_points(): | ||
p1 = Point(1, 2) | ||
p2 = Point(2, 3) | ||
p3 = Point(3, 4) | ||
p4 = Point(4, 1) | ||
return geopandas.GeoSeries([p1, p2, p3, p4]) | ||
|
||
|
||
@pytest.fixture | ||
def geoseries_lines(): | ||
l1 = LineString([(0, 0), (0, 1), (1, 1)]) | ||
l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)]) | ||
return geopandas.GeoSeries([l1, l2] * 2) | ||
|
||
|
||
@pytest.fixture() | ||
def geoseries_polygons(): | ||
t1 = Polygon([(0, 3.5), (7, 2.4), (1, 0.1)]) | ||
t2 = Polygon([(0, 0), (1, 1), (0, 1)]) | ||
sq1 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) | ||
sq2 = Polygon([(0, 0), (1, 0), (1, 2), (0, 2)]) | ||
return geopandas.GeoSeries([t1, t2, sq1, sq2]) | ||
|
||
|
||
def morton_distance_dask(geoseries): | ||
|
||
bounds = geoseries.bounds.to_numpy() | ||
total_bounds = geoseries.total_bounds | ||
coords = _continuous_to_discrete_coords(total_bounds, bounds, p=15) | ||
|
||
ddf = from_geopandas(geoseries, npartitions=1) | ||
result = ddf.morton_distance().compute() | ||
|
||
expected = [] | ||
|
||
for i in range(len(coords)): | ||
x = int(coords[i][0]) | ||
y = int(coords[i][1]) | ||
expected.append(interleave(x, y)) | ||
|
||
assert list(result) == expected | ||
assert isinstance(result, pd.Series) | ||
assert_index_equal(ddf.index.compute(), result.index) | ||
|
||
|
||
def test_morton_distance_points(geoseries_points): | ||
morton_distance_dask(geoseries_points) | ||
|
||
|
||
def test_morton_distance_lines(geoseries_lines): | ||
morton_distance_dask(geoseries_lines) | ||
|
||
|
||
def test_morton_distance_polygons(geoseries_polygons): | ||
morton_distance_dask(geoseries_polygons) |