Skip to content

Commit

Permalink
Use pint for units (#160)
Browse files Browse the repository at this point in the history
* use pint-xarray for unit conversion

* add to dependencies

* add packages to CI

* satisfy pre-commit

* enforce cf_xarray >= 0.6.0

* Update environment-upstream-dev.yml

* Update environment-cloud-test.yml

* Update environment-upstream-dev.yml

* Update preprocessing.py

* Update preprocessing.py

* Update environment.yml

* Update preprocessing.py

* Update test_preprocessing.py

* Update test_preprocessing.py

* Update preprocessing.py

* Remove assertions for warnings about missing units

* Update preprocessing.py

* [skip-ci]

* Add unit test to cloud tests

* Update test_preprocessing_cloud.py

* Fix cloud tests

* Added whats new

* control which arrays are going to be quantified.

* some minor changes

* Implemented overrides for `quantify`

* Update cmip6_preprocessing/preprocessing.py

Co-authored-by: keewis <keewis@users.noreply.github.com>

* Fix typos, add test for missing units attrs

* Catch error for missing units and warn

* Update preprocessing.py

* Install pint-xarray via pip

* Update environment-cloud-test.yml

* Ooops revert to conda

* Update environment.yml

* Update cmip6_preprocessing/preprocessing.py

Co-authored-by: keewis <keewis@users.noreply.github.com>

* Update cmip6_preprocessing/preprocessing.py

Co-authored-by: keewis <keewis@users.noreply.github.com>

* Update preprocessing.py

* Fix typo

* rebase use pint-xarray for unit conversion

* add to dependencies

* add packages to CI

* satisfy pre-commit

* enforce cf_xarray >= 0.6.0

* Update environment-upstream-dev.yml

* Update environment-cloud-test.yml

* Update environment-upstream-dev.yml

* Update preprocessing.py

* Update preprocessing.py

* Update environment.yml

* Update preprocessing.py

* Update test_preprocessing.py

* Update test_preprocessing.py

* Update preprocessing.py

* Remove assertions for warnings about missing units

* Update preprocessing.py

* [skip-ci]

* Add unit test to cloud tests

* Update test_preprocessing_cloud.py

* Fix cloud tests

* Added whats new

* control which arrays are going to be quantified.

* some minor changes

* Implemented overrides for `quantify`

* Update cmip6_preprocessing/preprocessing.py

Co-authored-by: keewis <keewis@users.noreply.github.com>

* Fix typos, add test for missing units attrs

* Catch error for missing units and warn

* Update preprocessing.py

* Install pint-xarray via pip

* Update environment-cloud-test.yml

* Ooops revert to conda

* Update environment.yml

* Update cmip6_preprocessing/preprocessing.py

Co-authored-by: keewis <keewis@users.noreply.github.com>

* Update cmip6_preprocessing/preprocessing.py

Co-authored-by: keewis <keewis@users.noreply.github.com>

* Update preprocessing.py

* Fix typo

* fix bug in test

* Fix formatting

Co-authored-by: Julius Busecke <julius@ldeo.columbia.edu>
Co-authored-by: keewis <keewis@users.noreply.github.com>
  • Loading branch information
3 people committed Aug 2, 2021
1 parent b1089b3 commit 44de62c
Show file tree
Hide file tree
Showing 10 changed files with 81 additions and 37 deletions.
3 changes: 3 additions & 0 deletions ci/environment-cloud-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ dependencies:
- intake-esm
- gcsfs
- zarr
- pint
- cf_xarray>=0.6.0
- pint-xarray
# Dependencies for the testing suite
- pytest-cov
- pytest-xdist
Expand Down
3 changes: 3 additions & 0 deletions ci/environment-upstream-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ dependencies:
- git+https://github.com/pydata/xarray.git
- git+https://github.com/xgcm/xgcm.git
- git+https://github.com/jbusecke/xarrayutils.git
- git+https://github.com/xarray-contrib/cf-xarray.git
- git+https://github.com/hgrecco/pint.git
- git+https://github.com/xarray-contrib/pint-xarray.git
3 changes: 3 additions & 0 deletions ci/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ dependencies:
- cartopy
- xesmf
- xarrayutils
- pint
- cf_xarray>=0.6.0
- pint-xarray>=0.2.1
- pytest-cov
- pytest-xdist
- codecov
52 changes: 28 additions & 24 deletions cmip6_preprocessing/preprocessing.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
# Preprocessing for CMIP6 models
import warnings

import cf_xarray.units
import numpy as np
import pandas as pd
import pint
import pint_xarray
import xarray as xr

from cmip6_preprocessing.utils import _maybe_make_list
from cmip6_preprocessing.utils import _maybe_make_list, cmip6_dataset_id


# global object for units
_desired_units = {"lev": "m"}
_unit_overrides = {name: None for name in ["so"]}


_drop_coords = ["bnds", "vertex"]
Expand Down Expand Up @@ -205,30 +213,26 @@ def maybe_fix_non_unique(data, pad=False):
return ds


def unit_conversion_dict():
"""Units conversion database"""
unit_dict = {"m": {"centimeters": 1 / 100}}
return unit_dict

def correct_units(ds):
"Converts coordinates into SI units using pint-xarray"
# codify units with pint
# Perhaps this should be kept separately from the fixing?
# See https://github.com/jbusecke/cmip6_preprocessing/pull/160#discussion_r667041858
try:
# exclude salinity from the quantification (see https://github.com/jbusecke/cmip6_preprocessing/pull/160#issuecomment-878627027 for details)
quantified = ds.pint.quantify(_unit_overrides)
target_units = {
var: target_unit
for var, target_unit in _desired_units.items()
if var in quantified
}

def correct_units(ds, verbose=False, stric=False):
"Converts coordinates into SI units using `unit_conversion_dict`"
unit_dict = unit_conversion_dict()
ds = ds.copy()
# coordinate conversions
for co, expected_unit in [("lev", "m")]:
if co in ds.coords:
if "units" in ds.coords[co].attrs.keys():
unit = ds.coords[co].attrs["units"]
if unit != expected_unit:
if unit in unit_dict[expected_unit].keys():
factor = unit_dict[expected_unit][unit]
ds.coords[co] = ds.coords[co] * factor
ds.coords[co].attrs["units"] = expected_unit
else:
warnings.warn("No conversion found in unit_dict")
else:
warnings.warn(f'{ds.attrs["source_id"]}: No units found for {co}')
converted = quantified.pint.to(target_units)
ds = converted.pint.dequantify(format="~P")
except ValueError as e:
warnings.warn(
f"{cmip6_dataset_id(ds)}: Unit correction failed with: {e}", UserWarning
)
return ds


Expand Down
1 change: 1 addition & 0 deletions docs/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies:
- recommonmark
- pip
- nc-time-axis
- pint
- pip:
- docrep<=0.2.7
- nbsphinx
Expand Down
11 changes: 11 additions & 0 deletions docs/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,23 @@ Breaking Changes
~~~~~~~~~~~~~~~~
- Requires xarray>=0.17.0 and drops support for python 3.6 (:pull:`170`, :pull:`173`). By `Julius Busecke <https://github.com/jbusecke>`_

Internal Changes
~~~~~~~~~~~~~~~~

- Unit correction logic now uses pint-xarray under the hood (:pull:`160`, :issue:`31`).
By `Tom Nicholas <https://github.com/TomNicholas>`_ and `Julius Busecke <https://github.com/jbusecke>`_
Bugfixes
~~~~~~~~
- Fixes incompatibility with upstream changes in xarray>=0.19.0 (:issue:`173`, :pull:`174`). By `Julius Busecke <https://github.com/jbusecke>`_

- :py:func:`~cmip6_preprocessing.drift_removal.match_and_remove_drift` does now work with chunked (dask powered) datasets (:pull:`164`).By `Julius Busecke <https://github.com/jbusecke>`_

Internal Changes
~~~~~~~~~~~~~~~~

- Unit correction logic now uses pint-xarray under the hood (:pull:`160`, :issue:`31`).
By `Tom Nicholas <https://github.com/TomNicholas>`_ and `Julius Busecke <https://github.com/jbusecke>`_


.. _whats-new.0.5.0:

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ quiet = false
color = true

[tool.isort]
known_third_party = ["cftime", "dask", "fsspec", "numpy", "pandas", "pkg_resources", "pytest", "setuptools", "xarray", "xarrayutils", "xesmf", "xgcm", "yaml"]
known_third_party = ["cf_xarray", "cftime", "dask", "fsspec", "numpy", "pandas", "pint", "pint_xarray", "pkg_resources", "pytest", "setuptools", "xarray", "xarrayutils", "xesmf", "xgcm", "yaml"]

[tool.pytest.ini_options]
minversion = "6.0"
Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ install_requires =
xgcm
cftime
xarrayutils
pint
cf_xarray >= 0.6.0
pint-xarray
setup_requires=
setuptools
setuptools-scm
Expand Down
21 changes: 13 additions & 8 deletions tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,14 +288,18 @@ def test_correct_units():
ds_test = correct_units(ds)
assert ds_test.lev.attrs["units"] == "m"
np.testing.assert_allclose(ds_test.lev.data, ds.lev.data / 100.0)
with pytest.warns(UserWarning):
ds_no_units = ds.copy()
ds_no_units.lev.attrs = {}
correct_units(ds_no_units)
with pytest.warns(UserWarning):
ds_unknown_units = ds.copy()
ds_unknown_units.lev.attrs["units"] = "whatever"
correct_units(ds_unknown_units)


def test_correct_units_missing():
lev = np.arange(0, 200)
data = np.random.rand(*lev.shape)
ds = xr.DataArray(data, dims=["lev"], coords={"lev": lev}).to_dataset(name="test")
ds.attrs["source_id"] = "something"
# should this raise a warning but pass?
msg = "Unit correction failed with: Cannot convert variables"
with pytest.warns(UserWarning, match=msg):
ds_test = correct_units(ds)
assert "units" not in ds_test.lev.attrs.keys()


def test_maybe_convert_bounds_to_vertex():
Expand Down Expand Up @@ -478,6 +482,7 @@ def test_combined_preprocessing_mislabeled_coords():
create_test_ds("x", "y", "dummy", xlen, ylen, zlen).squeeze().drop_vars("dummy")
)
ds = ds.assign(depth=5.0)
ds.depth.attrs["units"] = "m" # otherwise pint complains.

ds_pp = combined_preprocessing(ds)
assert "lev" in ds_pp.coords
Expand Down
19 changes: 15 additions & 4 deletions tests/test_preprocessing_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
import xarray as xr

from cmip6_preprocessing.grids import combine_staggered_grid
from cmip6_preprocessing.preprocessing import _drop_coords, combined_preprocessing
from cmip6_preprocessing.preprocessing import (
_desired_units,
_drop_coords,
combined_preprocessing,
)
from cmip6_preprocessing.utils import google_cmip_col, model_id_match


Expand All @@ -28,9 +32,9 @@ def data(
):
zarr_kwargs = {
"consolidated": True,
"decode_times": False,
# "decode_times": True,
# "use_cftime": True,
# "decode_times": False,
"decode_times": True,
"use_cftime": True,
}

cat = google_cmip_col(catalog=catalog).search(
Expand Down Expand Up @@ -225,6 +229,13 @@ def test_check_dim_coord_values_wo_intake(
if co in ds.dims:
assert co not in ds.coords

## Check unit conversion
for var, expected_unit in _desired_units.items():
if var in ds.variables:
unit = ds[var].attrs.get("units")
if unit:
assert unit == expected_unit


# this fixture has to be redifined every time to account for different fail cases for each test
@pytest.fixture
Expand Down

0 comments on commit 44de62c

Please sign in to comment.