Skip to content

Commit

Permalink
Merge pull request #20 from csiro-coasts/mask-buffer
Browse files Browse the repository at this point in the history
Make clip buffer configurable
  • Loading branch information
mx-moth committed Sep 29, 2022
2 parents a8990e6 + f11d8b0 commit 55a014d
Show file tree
Hide file tree
Showing 9 changed files with 301 additions and 45 deletions.
68 changes: 53 additions & 15 deletions src/emsarray/formats/_base.py
Expand Up @@ -1052,7 +1052,12 @@ def make_geojson_geometry(self) -> geojson.FeatureCollection:
])

@abc.abstractmethod
def make_clip_mask(self, clip_geometry: BaseGeometry) -> xr.Dataset:
def make_clip_mask(
self,
clip_geometry: BaseGeometry,
*,
buffer: int = 0,
) -> xr.Dataset:
"""
Make a new Dataset that can be used to clip this dataset to only the
cells that intersect some geometry.
Expand All @@ -1066,9 +1071,14 @@ def make_clip_mask(self, clip_geometry: BaseGeometry) -> xr.Dataset:
Parameters
----------
clip_geometry
clip_geometry : BaseGeometry
The desired area to cut out. This can be any shapely geometry type,
but will most likely be a polygon
buffer : int, optional
If set to a positive integer,
a buffer of that many cells will be added around the clip region.
This is useful if you need to clip to a particular area,
but also would like to do some interpolation on the output cells.
Returns
-------
Expand Down Expand Up @@ -1100,28 +1110,56 @@ def apply_clip_mask(self, clip_mask: xr.Dataset, work_dir: Pathish) -> xr.Datase
or save the dataset to disk somewhere outside of the working directory
before the working directory is cleaned up.
:param clip_mask: The mask, as made by :meth:`make_clip_mask`.
:param work_dir: A directory where temporary files can be written to.
Callers must create and manage this temporary directory, perhaps
using :obj:`tempfile.TemporaryDirectory`.
:returns: A new :class:`~xarray.Dataset` clipped using the mask
Parameters
----------
clip_mask : xarray.Dataset
The mask, as made by :meth:`make_clip_mask`.
work_dir : str or pathlib.Path
A directory where temporary files can be written to.
Callers must create and manage this temporary directory,
perhaps using :obj:`tempfile.TemporaryDirectory`.
Returns
-------
xarray.Dataset
A new :class:`~xarray.Dataset` clipped using the mask
"""

def clip(self, clip_geomery: BaseGeometry, work_dir: Pathish) -> xr.Dataset:
def clip(
self,
clip_geomery: BaseGeometry,
work_dir: Pathish,
*,
buffer: int = 0,
) -> xr.Dataset:
"""
Generates a clip mask and applies it in one step.
See the documentation for :meth:`.make_clip_mask` and
:meth:`.apply_clip_mask` for more details.
:param clip_geomery: The desired area to cut out. This can be any
shapely geometry type, but will most likely be a polygon
:param work_dir: A directory where temporary files can be written to.
Callers must create and manage this temporary directory, perhaps
using :obj:`tempfile.TemporaryDirectory`.
:returns: A new :class:`~xarray.Dataset` clipped using the mask
Parameters
----------
clip_geometry : BaseGeometry
The desired area to cut out.
This can be any shapely geometry type,
but will most likely be a polygon
work_dir : str or pathlib.Path
A directory where temporary files can be written to.
Callers must create and manage this temporary directory,
perhaps using :obj:`tempfile.TemporaryDirectory`.
buffer : int, optional
If set to a positive integer,
a buffer of that many cells will be added around the clip region.
This is useful if you need to clip to a particular area,
but also would like to do some interpolation on the output cells.
Returns
-------
xarray.Dataset
A new :class:`~xarray.Dataset` clipped using the mask
"""
mask = self.make_clip_mask(clip_geomery)
mask = self.make_clip_mask(clip_geomery, buffer=buffer)
return self.apply_clip_mask(mask, work_dir=work_dir)

def to_netcdf(self, path: Pathish, **kwargs: Any) -> None:
Expand Down
9 changes: 7 additions & 2 deletions src/emsarray/formats/arakawa_c.py
Expand Up @@ -344,7 +344,11 @@ def make_linear(self, data_array: xr.DataArray) -> xr.DataArray:
dimensions = [topology.j_dimension, topology.i_dimension]
return utils.linearise_dimensions(data_array, list(dimensions))

def make_clip_mask(self, clip_geometry: BaseGeometry) -> xr.Dataset:
def make_clip_mask(
self,
clip_geometry: BaseGeometry,
buffer: int = 0,
) -> xr.Dataset:
"""
Generate an xarray.Dataset with a mask for the cell centres, left and
back faces, and nodes. The mask values will be True where the
Expand All @@ -361,7 +365,8 @@ def make_clip_mask(self, clip_geometry: BaseGeometry) -> xr.Dataset:
face_mask.ravel()[intersecting_indices] = True

# Expand the mask by one cell around the clipped region, as a buffer
face_mask = masking.blur_mask(face_mask)
if buffer > 0:
face_mask = masking.blur_mask(face_mask, size=buffer)

# Complete the rest of the mask
return c_mask_from_centres(face_mask, self._dimensions_for_grid_kind, self.dataset.coords)
Expand Down
13 changes: 9 additions & 4 deletions src/emsarray/formats/grid.py
Expand Up @@ -253,17 +253,22 @@ def make_linear(self, data_array: xr.DataArray) -> xr.DataArray:
surface_dims = [self.topology.y_dimension, self.topology.x_dimension]
return utils.linearise_dimensions(data_array, surface_dims)

def make_clip_mask(self, clip_geometry: BaseGeometry) -> xr.Dataset:
def make_clip_mask(
self,
clip_geometry: BaseGeometry,
buffer: int = 0,
) -> xr.Dataset:
topology = self.topology

intersecting_indices = [
item.linear_index
for polygon, item in self.spatial_index.query(clip_geometry)
if polygon.intersects(clip_geometry)]
intersections = np.full(topology.shape, fill_value=False)
intersections.ravel()[intersecting_indices] = True
mask = np.full(topology.shape, fill_value=False)
mask.ravel()[intersecting_indices] = True

mask = masking.blur_mask(intersections)
if buffer > 0:
mask = masking.blur_mask(mask, size=buffer)
dimensions = [topology.y_dimension, topology.x_dimension]

return xr.Dataset(
Expand Down
17 changes: 11 additions & 6 deletions src/emsarray/formats/ugrid.py
Expand Up @@ -899,7 +899,11 @@ def make_linear(self, data_array: xr.DataArray) -> xr.DataArray:
grid_dimension = self.topology.dimension_for_grid_kind[grid_kind]
return utils.linearise_dimensions(data_array, [grid_dimension])

def make_clip_mask(self, clip_geometry: BaseGeometry) -> xr.Dataset:
def make_clip_mask(
self,
clip_geometry: BaseGeometry,
buffer: int = 0,
) -> xr.Dataset:
"""
Make a mask dataset from a clip geometry for this dataset.
This mask can later be applied using :meth:`apply_clip_mask`.
Expand All @@ -921,19 +925,20 @@ def make_clip_mask(self, clip_geometry: BaseGeometry) -> xr.Dataset:
"""
# Find all faces that intersect the clip geometry
logger.info("Making clip mask")
intersecting_face_indices = np.array([
face_indices = np.array([
item.linear_index
for polygon, item in self.spatial_index.query(clip_geometry)
if polygon.intersects(clip_geometry)
])
logger.debug("Found %d intersecting faces, adding buffer...", len(intersecting_face_indices))
logger.debug("Found %d intersecting faces, adding size %d buffer...", len(face_indices), buffer)

# Include all the neighbours of the intersecting faces
included_face_indices = buffer_faces(intersecting_face_indices, self.topology)
logger.debug("Total faces in mask: %d", len(included_face_indices))
for _ in range(buffer):
face_indices = buffer_faces(face_indices, self.topology)
logger.debug("Total faces in mask: %d", len(face_indices))

# Make a mask dataset
return mask_from_face_indices(included_face_indices, self.topology)
return mask_from_face_indices(face_indices, self.topology)

def apply_clip_mask(self, clip_mask: xr.Dataset, work_dir: Pathish) -> xr.Dataset:
"""
Expand Down
22 changes: 17 additions & 5 deletions src/emsarray/masking.py
Expand Up @@ -242,7 +242,7 @@ def smear_mask(arr: np.ndarray, pad_axes: List[bool]) -> np.ndarray:
return functools.reduce(operator.or_, (np.pad(arr, pad) for pad in paddings))


def blur_mask(arr: np.ndarray) -> np.ndarray:
def blur_mask(arr: np.ndarray, size: int = 1) -> np.ndarray:
"""
Take a boolean numpy array and blur it, such that all indices neighbouring
a True value in the input array are True in the output array. The output
Expand All @@ -261,8 +261,20 @@ def blur_mask(arr: np.ndarray) -> np.ndarray:
[0, 0, 1, 1, 1],
[0, 0, 1, 1, 1]]
"""
return np.fromfunction(
np.vectorize(lambda *indices: arr[tuple(np.s_[max(i - 1, 0):i + 2] for i in indices)].any()),
arr.shape,
dtype=int,
# Pad the mask with a `size` sized buffer.
# This allows simple slicing to pull out a rectangular region around an
# index, without worrying about the edges of the array.
padded = np.pad(arr, size, constant_values=False)

# For each cell in the original mask shape,
# the blurred mask is true if the original mask was true,
# or any cells in a `size` sized slice around the original cell.
arr_iter = np.nditer(arr, ['multi_index'])
indices = (arr_iter.multi_index for _ in arr_iter)
values = (
arr[index] or np.any(padded[tuple(slice(i, i + size * 2 + 1) for i in index)])
for index in indices
)

arr = np.fromiter(values, count=arr.size, dtype=arr.dtype).reshape(arr.shape)
return cast(np.ndarray, arr)
8 changes: 7 additions & 1 deletion tests/formats/test_base.py
Expand Up @@ -88,7 +88,13 @@ def polygons(self) -> np.ndarray:
for x in range(self.shape[1])
], dtype=object)

def make_clip_mask(self, clip_geometry: BaseGeometry) -> xr.Dataset:
def make_clip_mask(
self,
clip_geometry: BaseGeometry,
buffer: int = 0,
) -> xr.Dataset:
if buffer > 0:
raise ValueError("Can not buffer SimpleFormat clip masks")
intersections = [
item.linear_index
for polygon, item in self.spatial_index.query(clip_geometry)
Expand Down
30 changes: 24 additions & 6 deletions tests/formats/test_cfgrid1d.py
Expand Up @@ -242,14 +242,32 @@ def test_make_clip_mask():
clip_geometry = Polygon([
(0.18, .30), (.40, .30), (.60, .51), (.60, .64), (.18, .64), (.18, .30),
])

mask = helper.make_clip_mask(clip_geometry)
expected_cells = mask_from_strings([
"0000000000",
"0000000000",
"0000000000",
"0011100000",
"0011110000",
"0011111000",
"0011111000",
"0000000000",
"0000000000",
"0000000000",
"0000000000",
])
assert_equal(mask.data_vars['cell_mask'].values, expected_cells)

assert mask.attrs == {'type': 'CFGrid mask'}
assert mask.dims == {
topology.longitude_name: topology.longitude.size,
topology.latitude_name: topology.latitude.size,
}
assert list(mask.data_vars.keys()) == ['cell_mask']

# Test adding a buffer also
mask = helper.make_clip_mask(clip_geometry, buffer=1)
expected_cells = mask_from_strings([
"0000000000",
"0000000000",
Expand Down Expand Up @@ -285,12 +303,12 @@ def test_apply_clip_mask(tmp_path):
assert set(dataset.dims.keys()) == set(clipped.dims.keys())

# Check that the new topology seems reasonable
assert clipped.ems.topology.longitude.size == 7
assert clipped.ems.topology.latitude.size == 6
assert clipped.ems.topology.longitude.size == 5
assert clipped.ems.topology.latitude.size == 4

# Check that the data were preserved, beyond being clipped
def clip_values(values: np.ndarray) -> np.ndarray:
values = values[..., 2:8, 1:8].copy()
values = values[..., 3:7, 2:7].copy()
values[..., 0, -2:] = np.nan
values[..., 1, -1:] = np.nan
return values
Expand All @@ -300,10 +318,10 @@ def clip_values(values: np.ndarray) -> np.ndarray:
assert_equal(clipped.data_vars['temp'].values, clip_values(dataset.data_vars['temp'].values))

# Check that the new geometry matches the relevant polygons in the old geometry
assert len(clipped.ems.polygons) == 6 * 7
assert len(clipped.ems.polygons) == 5 * 4
original_polys = np.concatenate([
dataset.ems.polygons[(i * 10 + 1):(i * 10 + 8)]
for i in range(2, 8)
dataset.ems.polygons[(i * 10 + 2):(i * 10 + 7)]
for i in range(3, 7)
], axis=None)
assert len(clipped.ems.polygons) == len(original_polys)
for original_poly, clipped_poly in zip(original_polys, clipped.ems.polygons):
Expand Down

0 comments on commit 55a014d

Please sign in to comment.