Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function to split data on an expanding window #238

Merged
merged 4 commits into from
Mar 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ Coordinate Manipulation
inside
block_split
rolling_window
expanding_window

Utilities
---------
Expand Down
1 change: 1 addition & 0 deletions verde/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
inside,
block_split,
rolling_window,
expanding_window,
profile_coordinates,
get_region,
pad_region,
Expand Down
24 changes: 22 additions & 2 deletions verde/base/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,21 @@ def check_data(data):
return data


def check_coordinates(coordinates):
"""
Check that the given coordinate arrays are what we expect them to be.
Should be a tuple with arrays of the same shape.
"""
shapes = [coord.shape for coord in coordinates]
if not all(shape == shapes[0] for shape in shapes):
raise ValueError(
"Coordinate arrays must have the same shape. Coordinate shapes: {}".format(
shapes
)
)
return coordinates


def check_fit_input(coordinates, data, weights, unpack=True):
"""
Validate the inputs to the fit method of gridders.
Expand Down Expand Up @@ -59,8 +74,13 @@ def check_fit_input(coordinates, data, weights, unpack=True):
"""
data = check_data(data)
weights = check_data(weights)
if any(i.shape != j.shape for i in coordinates for j in data):
raise ValueError("Coordinate and data arrays must have the same shape.")
coordinates = check_coordinates(coordinates)
if any(i.shape != coordinates[0].shape for i in data):
raise ValueError(
"Data arrays must have the same shape {} as coordinates. Data shapes: {}.".format(
coordinates[0].shape, [i.shape for i in data]
)
)
if any(w is not None for w in weights):
if len(weights) != len(data):
raise ValueError(
Expand Down
135 changes: 126 additions & 9 deletions verde/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
from sklearn.utils import check_random_state

from .base.utils import n_1d_arrays
from .base.utils import n_1d_arrays, check_coordinates
from .utils import kdtree


Expand Down Expand Up @@ -738,6 +738,7 @@ def block_split(coordinates, spacing=None, adjust="spacing", region=None, shape=
--------
BlockReduce : Apply a reduction operation to the data in blocks (windows).
rolling_window : Select points on a rolling (moving) window.
expanding_window : Select points on windows of changing size.

Examples
--------
Expand Down Expand Up @@ -771,6 +772,7 @@ def block_split(coordinates, spacing=None, adjust="spacing", region=None, shape=
[6 6 6 7 7 7]]

"""
coordinates = check_coordinates(coordinates)
if region is None:
region = get_region(coordinates)
block_coords = grid_coordinates(
Expand Down Expand Up @@ -835,6 +837,7 @@ def rolling_window(
See also
--------
block_split : Split a region into blocks and label points accordingly.
expanding_window : Select points on windows of changing size.

Examples
--------
Expand Down Expand Up @@ -943,13 +946,7 @@ def rolling_window(
[6. 6. 6. 7. 7. 7. 8. 8. 8.]

"""
shapes = [coord.shape for coord in coordinates]
if not all(shape == shapes[0] for shape in shapes):
raise ValueError(
"Coordinate arrays must have the same shape. Given shapes: {}".format(
shapes
)
)
coordinates = check_coordinates(coordinates)
if region is None:
region = get_region(coordinates)
# Calculate the region spanning the centers of the rolling windows
Expand Down Expand Up @@ -981,7 +978,8 @@ def rolling_window(
# like empty lists but can handle empty integer arrays in case a window has
# no points inside it.
indices.ravel()[:] = [
np.unravel_index(np.array(i, dtype="int"), shape=shapes[0]) for i in indices1d
np.unravel_index(np.array(i, dtype="int"), shape=coordinates[0].shape)
for i in indices1d
]
return centers, indices

Expand All @@ -1005,6 +1003,125 @@ def _check_rolling_window_overlap(region, size, shape, spacing):
)


def expanding_window(coordinates, center, sizes):
"""
Select points on windows of changing size around a center point.

Returns the indices of points falling inside each window.

Parameters
----------
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...).
center : tuple
The coordinates of the center of the window. Should be in the
following order: (easting, northing, vertical, ...).
sizes : array
The sizes of the windows. Does not have to be in any particular order.
The order of indices returned will match the order of window sizes
given. Units should match the units of *coordinates* and *center*.

Returns
-------
indices : list
Each element of the list corresponds to the indices of points falling
inside a window. Use them to index the coordinates for each window. The
indices will depend on the number of dimensions in the input
coordinates. For example, if the coordinates are 2D arrays, each window
will contain indices for 2 dimensions (row, column).

See also
--------
block_split : Split a region into blocks and label points accordingly.
rolling_window : Select points on a rolling (moving) window.

Examples
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The example looks great. Easy to follow and understand how the expanding window works on 1D and 2D data

--------

Generate a set of sample coordinates on a grid and determine the indices
of points for each expanding window:

>>> from verde import grid_coordinates
>>> coords = grid_coordinates((-5, -1, 6, 10), spacing=1)
>>> print(coords[0])
[[-5. -4. -3. -2. -1.]
[-5. -4. -3. -2. -1.]
[-5. -4. -3. -2. -1.]
[-5. -4. -3. -2. -1.]
[-5. -4. -3. -2. -1.]]
>>> print(coords[1])
[[ 6. 6. 6. 6. 6.]
[ 7. 7. 7. 7. 7.]
[ 8. 8. 8. 8. 8.]
[ 9. 9. 9. 9. 9.]
[10. 10. 10. 10. 10.]]
>>> # Get the expanding window indices
>>> indices = expanding_window(coords, center=(-3, 8), sizes=[1, 2, 4])
>>> # There is one index per window
>>> print(len(indices))
3
>>> # The points in the first window. Indices are 2D positions because the
>>> # coordinate arrays are 2D.
>>> print(len(indices[0]))
2
>>> for dimension in indices[0]:
... print(dimension)
[2]
[2]
>>> for dimension in indices[1]:
... print(dimension)
[1 1 1 2 2 2 3 3 3]
[1 2 3 1 2 3 1 2 3]
>>> for dimension in indices[2]:
... print(dimension)
[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 4 4 4 4 4]
[0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4]
>>> # To get the coordinates for each window, use indexing
>>> print(coords[0][indices[0]])
[-3.]
>>> print(coords[1][indices[0]])
[8.]
>>> print(coords[0][indices[1]])
[-4. -3. -2. -4. -3. -2. -4. -3. -2.]
>>> print(coords[1][indices[1]])
[7. 7. 7. 8. 8. 8. 9. 9. 9.]

If the coordinates are 1D, the indices will also be 1D:

>>> coords1d = [coord.ravel() for coord in coords]
>>> indices = expanding_window(coords1d, center=(-3, 8), sizes=[1, 2, 4])
>>> print(len(indices))
3
>>> # Since coordinates are 1D, there is only one index
>>> print(len(indices[0]))
1
>>> print(indices[0][0])
[12]
>>> print(indices[1][0])
[ 6 7 8 11 12 13 16 17 18]
>>> # The returned indices can be used in the same way as before
>>> print(coords1d[0][indices[0]])
[-3.]
>>> print(coords1d[1][indices[0]])
[8.]

"""
coordinates = check_coordinates(coordinates)
shape = coordinates[0].shape
center = np.atleast_2d(center)
# pykdtree doesn't support query_ball_point yet and we need that
tree = kdtree(coordinates, use_pykdtree=False)
indices = []
for size in sizes:
# Use p=inf (infinity norm) to get square windows instead of circular
index1d = tree.query_ball_point(center, r=size / 2, p=np.inf)[0]
# Convert indices to an array to avoid errors when the index is empty
# (no points in the window). unravel_index doesn't like empty lists.
indices.append(np.unravel_index(np.array(index1d, dtype="int"), shape=shape))
return indices


def longitude_continuity(coordinates, region):
"""
Modify coordinates and region boundaries to ensure longitude continuity.
Expand Down
16 changes: 15 additions & 1 deletion verde/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy.testing as npt
import pytest

from ..base.utils import check_fit_input
from ..base.utils import check_fit_input, check_coordinates
from ..base.base_classes import (
BaseGridder,
get_dims,
Expand All @@ -16,6 +16,20 @@
from ..coordinates import grid_coordinates, scatter_points


def test_check_coordinates():
"Should raise a ValueError is the coordinates have different shapes."
# Should not raise an error
check_coordinates([np.arange(10), np.arange(10)])
check_coordinates([np.arange(10).reshape((5, 2)), np.arange(10).reshape((5, 2))])
# Should raise an error
with pytest.raises(ValueError):
check_coordinates([np.arange(10), np.arange(10).reshape((5, 2))])
with pytest.raises(ValueError):
check_coordinates(
[np.arange(10).reshape((2, 5)), np.arange(10).reshape((5, 2))]
)


def test_get_dims():
"Tests that get_dims returns the expected results"
assert get_dims(dims=None) == ("northing", "easting")
Expand Down