Skip to content

Commit

Permalink
Merge 76b1922 into 2d91a07
Browse files Browse the repository at this point in the history
  • Loading branch information
snowman2 committed Nov 1, 2019
2 parents 2d91a07 + 76b1922 commit 34366d9
Show file tree
Hide file tree
Showing 3 changed files with 211 additions and 7 deletions.
66 changes: 59 additions & 7 deletions rioxarray/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import os
import re
import warnings
from collections import OrderedDict
from distutils.version import LooseVersion
Expand Down Expand Up @@ -194,14 +195,54 @@ def parsevec(s):
return parsed_tags


def build_subdataset_filter(group_names, variable_names):
"""
Example::
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf":
MODIS_Grid_2D:sur_refl_b01_1'
Parameters
----------
group_names: str or list or tuple
Name or names of netCDF groups to filter by.
variable_names: str or list or tuple
Name or names of netCDF variables to filter by.
Returns
-------
re.SRE_Pattern: output of re.compile()
"""
variable_query = r"\w+"
if variable_names is not None:
if not isinstance(variable_names, (tuple, list)):
variable_names = [variable_names]
variable_names = [re.escape(variable_name) for variable_name in variable_names]
variable_query = rf"(?:{'|'.join(variable_names)})"
if group_names is not None:
if not isinstance(group_names, (tuple, list)):
group_names = [group_names]
group_names = [re.escape(group_name) for group_name in group_names]
group_query = rf"(?:{'|'.join(group_names)})"
else:
return re.compile(r"".join([r".*(?:\:/|\:)(/+)?", variable_query, r"$"]))
return re.compile(
r"".join(
[r".*(?:\:/|\:)(/+)?", group_query, r"[:/](/+)?", variable_query, r"$"]
)
)


def open_rasterio(
filename,
parse_coordinates=None,
chunks=None,
cache=None,
lock=None,
masked=False,
**open_kwargs
variable=None,
group=None,
**open_kwargs,
):
"""Open a file with rasterio (experimental).
Expand All @@ -223,32 +264,36 @@ def open_rasterio(
Parameters
----------
filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT
filename: str, rasterio.DatasetReader, or rasterio.WarpedVRT
Path to the file to open. Or already open rasterio dataset.
parse_coordinates : bool, optional
parse_coordinates: bool, optional
Whether to parse the x and y coordinates out of the file's
``transform`` attribute or not. The default is to automatically
parse the coordinates only if they are rectilinear (1D).
It can be useful to set ``parse_coordinates=False``
if your files are very large or if you don't need the coordinates.
chunks : int, tuple or dict, optional
chunks: int, tuple or dict, optional
Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
DataArray into a dask array. Chunks can also be set to
``True`` or ``"auto"`` to choose sensible chunk sizes according to
``dask.config.get("array.chunk-size").
cache : bool, optional
cache: bool, optional
If True, cache data loaded from the underlying datastore in memory as
NumPy arrays when accessed to avoid reading from the underlying data-
store multiple times. Defaults to True unless you specify the `chunks`
argument to use dask, in which case it defaults to False.
lock : False, True or threading.Lock, optional
lock: False, True or threading.Lock, optional
If chunks is provided, this argument is passed on to
:py:func:`dask.array.from_array`. By default, a global lock is
used to avoid issues with concurrent access to the same file when using
dask's multithreaded backend.
masked : bool, optional
masked: bool, optional
If True, read the mask and to set values to NaN. Defaults to False.
variable: str or list or tuple, optional
Variable name or names to use to filter loading.
group: str or list or tuple, optional
Group name or names to use to filter loading.
**open_kwargs: kwargs, optional
Optional keyword arguments to pass into rasterio.open().
Expand Down Expand Up @@ -293,8 +338,15 @@ def open_rasterio(

# open the subdatasets if they exist
if riods.subdatasets:
subdataset_filter = None
if any((group, variable)):
subdataset_filter = build_subdataset_filter(group, variable)
data_arrays = {}
for iii, subdataset in enumerate(riods.subdatasets):
if subdataset_filter is not None and not subdataset_filter.match(
subdataset
):
continue
rioda = open_rasterio(
subdataset,
parse_coordinates=iii == 0 and parse_coordinates,
Expand Down
4 changes: 4 additions & 0 deletions sphinx/history.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
History
=======

0.0.15
------
- Add `variable` and `group` kwargs to `rioxarray.open_rasterio()` to allow filtering of subdatasets.

0.0.14
------
- Add `windowed` kwarg to `rio.to_raster()` to write to raster using windowed writing (pull #54)
Expand Down
148 changes: 148 additions & 0 deletions test/integration/test_integration__io.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,161 @@
from xarray.testing import assert_allclose, assert_equal, assert_identical

import rioxarray
from rioxarray._io import build_subdataset_filter
from test.conftest import (
TEST_COMPARE_DATA_DIR,
TEST_INPUT_DATA_DIR,
_assert_xarrays_equal,
)


@pytest.mark.parametrize(
"subdataset, variable, group, match",
[
(
"netcdf:../../test/test_data/input/PLANET_SCOPE_3D.nc:blue",
"green",
None,
False,
),
(
"netcdf:../../test/test_data/input/PLANET_SCOPE_3D.nc:blue",
"blue",
None,
True,
),
(
"netcdf:../../test/test_data/input/PLANET_SCOPE_3D.nc:blue1",
"blue",
None,
False,
),
(
"netcdf:../../test/test_data/input/PLANET_SCOPE_3D.nc:1blue",
"blue",
None,
False,
),
(
"netcdf:../../test/test_data/input/PLANET_SCOPE_3D.nc:blue",
"blue",
"gr",
False,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf":MODIS_Grid_2D:sur_refl_b01_1',
["sur_refl_b01_1"],
None,
True,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf":MODIS_Grid_2D:sur_refl_b01_1',
None,
["MODIS_Grid_2D"],
True,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf":MODIS_Grid_2D:sur_refl_b01_1',
("sur_refl_b01_1",),
("MODIS_Grid_2D",),
True,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf":MODIS_Grid_2D:sur_refl_b01_1',
"blue",
"gr",
False,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf":MODIS_Grid_2D:sur_refl_b01_1',
"sur_refl_b01_1",
"gr",
False,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf":MODIS_Grid_2D:sur_refl_b01_1',
None,
"gr",
False,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf"://MODIS_Grid_2D://sur_refl_b01_1',
"sur_refl_b01_1",
None,
True,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf"://MODIS_Grid_2D://sur_refl_b01_1',
None,
"MODIS_Grid_2D",
True,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf"://MODIS_Grid_2D://sur_refl_b01_1',
"sur_refl_b01_1",
"MODIS_Grid_2D",
True,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf"://MODIS_Grid_2D://sur_refl_b01_1',
"blue",
"gr",
False,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf"://MODIS_Grid_2D://sur_refl_b01_1',
"sur_refl_b01_1",
"gr",
False,
),
(
'HDF4_EOS:EOS_GRID:"./modis/MOD09GQ.A2017290.h11v04.006.NRT.hdf"://MODIS_Grid_2D://sur_refl_b01_1',
None,
"gr",
False,
),
(
"netcdf:S5P_NRTI_L2__NO2____20190513T181819_20190513T182319_08191_01_010301_20190513T185033.nc:/PRODUCT/tm5_constant_a",
None,
"PRODUCT",
True,
),
(
"netcdf:S5P_NRTI_L2__NO2____20190513T181819_20190513T182319_08191_01_010301_20190513T185033.nc:/PRODUCT/tm5_constant_a",
"tm5_constant_a",
"PRODUCT",
True,
),
(
"netcdf:S5P_NRTI_L2__NO2____20190513T181819_20190513T182319_08191_01_010301_20190513T185033.nc:/PRODUCT/tm5_constant_a",
"tm5_constant_a",
"/PRODUCT",
True,
),
],
)
def test_build_subdataset_filter(subdataset, variable, group, match):
assert (
build_subdataset_filter(group, variable).search(subdataset) is not None
) == match


def test_open_variable_filter():
with rioxarray.open_rasterio(
os.path.join(TEST_INPUT_DATA_DIR, "PLANET_SCOPE_3D.nc"), variable=["blue"]
) as rds:
assert list(rds.data_vars) == ["blue"]


def test_open_group_filter():
with rioxarray.open_rasterio(
os.path.join(TEST_INPUT_DATA_DIR, "PLANET_SCOPE_3D.nc"),
variable="blue",
group=["non-existent"],
) as rds:
assert list(rds.data_vars) == []


def test_open_rasterio_mask_chunk_clip():
with rioxarray.open_rasterio(
os.path.join(TEST_COMPARE_DATA_DIR, "small_dem_3m_merged.tif"),
Expand Down

0 comments on commit 34366d9

Please sign in to comment.