Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,36 +79,38 @@ There are three "environments" set up for pixi:
- `dev`
- `examples`

And two "tasks":
And three "tasks":

- `lint`
- `test`
- `test` : run most of the tests
- `test_all` : run the tests that access AWS -- i.e. download data directly.

To run the tests in an isolated environment:

```bash
pixi run -e dev test
```

To run a shell to do dev work:

Or with a specific python version:
```bash
pixi shell -e dev
pixi run -e test312 test
```

If you want to run the examples (notebooks and al that):
Options are: `test310` `test311` `test312` `test313`


To run a shell to do dev work:

```bash
pixi shell -e all
pixi shell -e dev
```

That will set up a conda environment with all the develop dependencies.

To run a shell in which you can run the examples:
To run a shell in which you can run the examples (notebooks and al that):

```bash
pixi shell -e examples
```

To run a shell with everything (dev and example deps:

```bash
Expand Down
5,950 changes: 3,012 additions & 2,938 deletions pixi.lock

Large diffs are not rendered by default.

21 changes: 11 additions & 10 deletions profiling/STOFS_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import xarray as xr
import xugrid as xu

bucket_name = 'noaa-gestofs-pds'
key = '_para2/stofs_2d_glo.20230819/stofs_2d_glo.t00z.fields.cwl.nc'
bucket_name = "noaa-gestofs-pds"
key = "_para2/stofs_2d_glo.20230819/stofs_2d_glo.t00z.fields.cwl.nc"
url = f"s3://{bucket_name}/{key}"

fs = fsspec.filesystem("s3", anon=True)
Expand All @@ -16,30 +16,31 @@
fs.open(url),
chunks={},
drop_variables=["neta", "nvel", "max_nvdll", "max_nvell"],
engine='h5netcdf'
engine="h5netcdf",
)

bbox = (-70, 40, -60, 50)


def info(ds):
print(f'Data Variables: {ds.data_vars}')
print(f'Coordinates: {ds.coords}')
print(f'Dimensions: {ds.dims}')
print(f'Attributes: {ds.attrs}\n')
print(f"Data Variables: {ds.data_vars}")
print(f"Coordinates: {ds.coords}")
print(f"Dimensions: {ds.dims}")
print(f"Attributes: {ds.attrs}\n")


print("Subsetting methods comparison")
start = time.time()
ads = ds.xsg.grid.subset_bbox(ds, bbox)
print(f"Xarray-subset-grid - {time.time-start} sec")
print(f"Xarray-subset-grid - {time.time - start} sec")

start = time.time()
tds = thalassa.normalize(ds)
tds = thalassa.crop(tds, shapely.box(*bbox))
print(f"Thalassa - {time.time-start} sec")
print(f"Thalassa - {time.time - start} sec")

# Checking time only for subsetting operation
uds = xu.UgridDataset(ds)
start = time.time()
uds = uds.ugrid.sel(y=slice(bbox[2], bbox[3]), x=slice(bbox[0], bbox[1]))
print(f"UGrid - {time.time-start} sec")
print(f"UGrid - {time.time - start} sec")
13 changes: 11 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,19 @@ write_to = "xarray_subset_grid/_version.py"

[tool.ruff]
builtins = ["ellipsis"]
extend-exclude = ["xarray_subset_grid/_version.py"]
extend-exclude = ["xarray_subset_grid/_version.py", "docs"]
target-version = "py310"
# Use a longer line length.
line-length = 100

[tool.ruff.format]
# Prefer single quotes over double quotes.
quote-style = "double"
# Use `\n` line endings for all files
line-ending = "lf"
exclude = ["*.ipynb"]


[tool.ruff.lint]
ignore = [
"E402", # module level import not at top of file
Expand Down Expand Up @@ -138,7 +146,8 @@ python-build = "*"

[tool.pixi.feature.dev.tasks]
lint = "ruff check tests xarray_subset_grid"
test = "pytest --online tests/"
test = "pytest tests/"
test_all = "pytest --online tests/"

[tool.pixi.feature.examples.dependencies]
matplotlib = "*"
Expand Down
3 changes: 1 addition & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ def pytest_addoption(parser):
def pytest_configure(config):
# register an additional marker
config.addinivalue_line(
"markers",
"online: mark test to run only when online (using AWS resources)"
"markers", "online: mark test to run only when online (using AWS resources)"
)


Expand Down
6 changes: 3 additions & 3 deletions tests/test_grids/test_regular_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from xarray_subset_grid.grids.regular_grid import RegularGrid

TEST_DATA = Path(__file__).parent.parent / 'example_data'
TEST_DATA = Path(__file__).parent.parent / "example_data"

TEST_FILE1 = TEST_DATA / "AMSEAS-subset.nc"

Expand All @@ -23,6 +23,7 @@

"""


def test_recognise():
"""
works for at least one file ...
Expand All @@ -31,6 +32,7 @@ def test_recognise():

assert RegularGrid.recognize(ds)


def test_recognise_not():
"""
should not recognise an SGrid
Expand All @@ -40,8 +42,6 @@ def test_recognise_not():
assert not RegularGrid.recognize(ds)




#######
# These from teh ugrid tests -- need to be adapted
#######
Expand Down
110 changes: 58 additions & 52 deletions tests/test_grids/test_sgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,44 +11,55 @@
try:
import fsspec
import zarr

zarr__version__ = int(zarr.__version__.split(".")[0])
except ImportError:
fsspec = None


test_dir = Path(__file__).parent.parent / 'example_data'
test_dir = Path(__file__).parent.parent / "example_data"

sample_sgrid_file = test_dir / "arakawa_c_test_grid.nc"

sample_sgrid_file = test_dir / 'arakawa_c_test_grid.nc'

def test_grid_topology_location_parse():
ds = xr.open_dataset(sample_sgrid_file, decode_times=False)
node_info = _get_location_info_from_topology(ds['grid'], 'node')
edge1_info = _get_location_info_from_topology(ds['grid'], 'edge1')
edge2_info = _get_location_info_from_topology(ds['grid'], 'edge2')
face_info = _get_location_info_from_topology(ds['grid'], 'face')

assert node_info == {'dims': ['xi_psi', 'eta_psi'],
'coords': ['lon_psi', 'lat_psi'],
'padding': {'xi_psi': 'none', 'eta_psi': 'none'}}
assert edge1_info == {'dims': ['xi_u', 'eta_u'],
'coords': ['lon_u', 'lat_u'],
'padding': {'eta_u': 'both', 'xi_u': 'none'}}
assert edge2_info == {'dims': ['xi_v', 'eta_v'],
'coords': ['lon_v', 'lat_v'],
'padding': {'xi_v': 'both', 'eta_v': 'none'}}
assert face_info == {'dims': ['xi_rho', 'eta_rho'],
'coords': ['lon_rho', 'lat_rho'],
'padding': {'xi_rho': 'both', 'eta_rho': 'both'}}


@pytest.mark.skipif(zarr__version__>=3,
reason="zarr3.0.8 doesn't support FSpec AWS (it might soon)")
node_info = _get_location_info_from_topology(ds["grid"], "node")
edge1_info = _get_location_info_from_topology(ds["grid"], "edge1")
edge2_info = _get_location_info_from_topology(ds["grid"], "edge2")
face_info = _get_location_info_from_topology(ds["grid"], "face")

assert node_info == {
"dims": ["xi_psi", "eta_psi"],
"coords": ["lon_psi", "lat_psi"],
"padding": {"xi_psi": "none", "eta_psi": "none"},
}
assert edge1_info == {
"dims": ["xi_u", "eta_u"],
"coords": ["lon_u", "lat_u"],
"padding": {"eta_u": "both", "xi_u": "none"},
}
assert edge2_info == {
"dims": ["xi_v", "eta_v"],
"coords": ["lon_v", "lat_v"],
"padding": {"xi_v": "both", "eta_v": "none"},
}
assert face_info == {
"dims": ["xi_rho", "eta_rho"],
"coords": ["lon_rho", "lat_rho"],
"padding": {"xi_rho": "both", "eta_rho": "both"},
}


@pytest.mark.skipif(
zarr__version__ >= 3, reason="zarr3.0.8 doesn't support FSpec AWS (it might soon)"
)
@pytest.mark.online
def test_polygon_subset():
'''
"""
This is a basic integration test for the subsetting of a ROMS sgrid dataset using
a polygon.
'''
"""
if fsspec is None:
raise ImportError("Must have fsspec installed to run --online tests")
fs = fsspec.filesystem(
Expand All @@ -61,9 +72,7 @@ def test_polygon_subset():
)
m = fs.get_mapper("")

ds = xr.open_dataset(
m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={}
)
ds = xr.open_dataset(m, engine="zarr", backend_kwargs=dict(consolidated=False), chunks={})

polygon = np.array(
[
Expand All @@ -77,46 +86,43 @@ def test_polygon_subset():
[-122.38488806417945, 34.98888604471138],
]
)
ds_temp = ds.xsg.subset_vars(['temp','u', 'v'])
ds_temp = ds.xsg.subset_vars(["temp", "u", "v"])
ds_subset = ds_temp.xsg.subset_polygon(polygon)

# Check that the subset dataset has the correct dimensions given the original padding
assert ds_subset.sizes['eta_rho'] == ds_subset.sizes['eta_psi'] + 1
assert ds_subset.sizes['eta_u'] == ds_subset.sizes['eta_psi'] + 1
assert ds_subset.sizes['eta_v'] == ds_subset.sizes['eta_psi']
assert ds_subset.sizes['xi_rho'] == ds_subset.sizes['xi_psi'] + 1
assert ds_subset.sizes['xi_u'] == ds_subset.sizes['xi_psi']
assert ds_subset.sizes['xi_v'] == ds_subset.sizes['xi_psi'] + 1
assert ds_subset.sizes["eta_rho"] == ds_subset.sizes["eta_psi"] + 1
assert ds_subset.sizes["eta_u"] == ds_subset.sizes["eta_psi"] + 1
assert ds_subset.sizes["eta_v"] == ds_subset.sizes["eta_psi"]
assert ds_subset.sizes["xi_rho"] == ds_subset.sizes["xi_psi"] + 1
assert ds_subset.sizes["xi_u"] == ds_subset.sizes["xi_psi"]
assert ds_subset.sizes["xi_v"] == ds_subset.sizes["xi_psi"] + 1

# Check that the subset rho/psi/u/v positional relationship makes sense aka psi point is
# 'between' it's neighbor rho points
# Note that this needs to be better generalized; it's not trivial to write a test that
# works in all potential cases.
assert (ds_subset['lon_rho'][0,0] < ds_subset['lon_psi'][0,0]
and ds_subset['lon_rho'][0,1] > ds_subset['lon_psi'][0,0])
assert (
ds_subset["lon_rho"][0, 0] < ds_subset["lon_psi"][0, 0]
and ds_subset["lon_rho"][0, 1] > ds_subset["lon_psi"][0, 0]
)

# ds_subset.temp_sur.isel(ocean_time=0).plot(x="lon_rho", y="lat_rho")


def test_polygon_subset_2():
ds = xr.open_dataset(sample_sgrid_file, decode_times=False)
polygon = np.array([
[6.5, 37.5],
[6.5, 39.5],
[9.5, 40.5],
[8.5, 37.5],
[6.5, 37.5]
])
polygon = np.array([[6.5, 37.5], [6.5, 39.5], [9.5, 40.5], [8.5, 37.5], [6.5, 37.5]])
ds_subset = ds.xsg.subset_polygon(polygon)

#Check that the subset dataset has the correct dimensions given the original padding
assert ds_subset.sizes['eta_rho'] == ds_subset.sizes['eta_psi'] + 1
assert ds_subset.sizes['eta_u'] == ds_subset.sizes['eta_psi'] + 1
assert ds_subset.sizes['eta_v'] == ds_subset.sizes['eta_psi']
assert ds_subset.sizes['xi_rho'] == ds_subset.sizes['xi_psi'] + 1
assert ds_subset.sizes['xi_u'] == ds_subset.sizes['xi_psi']
assert ds_subset.sizes['xi_v'] == ds_subset.sizes['xi_psi'] + 1
# Check that the subset dataset has the correct dimensions given the original padding
assert ds_subset.sizes["eta_rho"] == ds_subset.sizes["eta_psi"] + 1
assert ds_subset.sizes["eta_u"] == ds_subset.sizes["eta_psi"] + 1
assert ds_subset.sizes["eta_v"] == ds_subset.sizes["eta_psi"]
assert ds_subset.sizes["xi_rho"] == ds_subset.sizes["xi_psi"] + 1
assert ds_subset.sizes["xi_u"] == ds_subset.sizes["xi_psi"]
assert ds_subset.sizes["xi_v"] == ds_subset.sizes["xi_psi"] + 1

assert ds_subset.lon_psi.min() <= 6.5 and ds_subset.lon_psi.max() >= 9.5
assert ds_subset.lat_psi.min() <= 37.5 and ds_subset.lat_psi.max() >= 40.5

assert 'u' in ds_subset.variables.keys()
assert "u" in ds_subset.variables.keys()
Loading
Loading