diff --git a/HOWTOCONTRIBUTE.rst b/HOWTOCONTRIBUTE.rst index a6478ff..f7719bc 100644 --- a/HOWTOCONTRIBUTE.rst +++ b/HOWTOCONTRIBUTE.rst @@ -4,7 +4,7 @@ Contribution Guide Contributions are highly welcomed and appreciated. Every little help counts, so do not hesitate! You can make a high impact on ``esmtools`` just by using it and -reporting `issues `_. +reporting `issues `__. The following sections cover some general guidelines regarding development in ``esmtools`` for maintainers and contributors. @@ -25,7 +25,7 @@ Feature requests and feedback We are eager to hear about your requests for new features and any suggestions about the API, infrastructure, and so on. Feel free to submit these as -`issues `_ with the label "feature request." +`issues `__ with the label "feature request." Please make sure to explain in detail how the feature should work and keep the scope as narrow as possible. This will make it easier to implement in small PRs. @@ -36,7 +36,7 @@ narrow as possible. This will make it easier to implement in small PRs. Report bugs ----------- -Report bugs for ``esmtools`` in the `issue tracker `_ +Report bugs for ``esmtools`` in the `issue tracker `__ with the label "bug". If you are reporting a bug, please include: @@ -68,12 +68,15 @@ Write documentation * More complementary documentation. Have you perhaps found something unclear? * Docstrings. There can never be too many of them. -* Example notebooks with different Earth System Models, lead times, etc. -- they're all very appreciated. You can also edit documentation files directly in the GitHub web interface, without using a local copy. This can be convenient for small fixes. -Our documentation is written in reStructuredText. You can follow our conventions in already written documents. Some helpful guides are located `here `_ and `here `_. +Our documentation is written in reStructuredText. You can follow our conventions in +already written documents. Some helpful guides are +located `here `__ +and +`here `__. .. note:: Build the documentation locally with the following command: @@ -86,9 +89,12 @@ Our documentation is written in reStructuredText. You can follow our conventions The built documentation should be available in the ``docs/build/``. -If you need to add new functions to the API, add the functions to ``api.rst`` then run ``sphinx-autogen -o api api.rst`` from the ``docs/source`` directory. You might need to run ``make clean`` from the ``docs/`` directory and then ``make html`` again to get the links to build properly. +If you need to add new functions to the API, add the functions to ``api.rst`` then +run ``sphinx-autogen -o api api.rst`` from the ``docs/source`` directory. You might +need to run ``make clean`` from the ``docs/`` directory and then ``make html`` again +to get the links to build properly. - .. _`pull requests`: +.. _`pull requests`: .. _pull-requests: Preparing Pull Requests @@ -123,23 +129,25 @@ Preparing Pull Requests $ pip install -e . -#. Install `pre-commit `_ and its hook on the ``esmtools`` repo:: +#. Install `pre-commit `_ and its hook on the ``esmtools`` + repo:: $ pip install --user pre-commit $ pre-commit install Afterwards ``pre-commit`` will run whenever you commit. - https://pre-commit.com/ is a framework for managing and maintaining multi-language pre-commit hooks - to ensure code-style and code formatting is consistent. + https://pre-commit.com/ is a framework for managing and maintaining multi-language + pre-commit hooks to ensure code-style and code formatting is consistent. Now you have an environment called ``esmtools-dev`` that you can work in. You’ll need to make sure to activate that environment next time you want to use it after closing the terminal or your system. - You can now edit your local working copy and run/add tests as necessary. Please follow - PEP-8 for naming. When committing, ``pre-commit`` will modify the files as needed, or - will generally be quite clear about what you need to do to pass the commit test. + You can now edit your local working copy and run/add tests as necessary. Please + follow PEP-8 for naming. When committing, ``pre-commit`` will modify the files as + needed, or will generally be quite clear about what you need to do to pass the + commit test. #. Break your edits up into reasonably sized commits. @@ -162,21 +170,26 @@ Preparing Pull Requests (``:pr:`#```) ````_`` - where ```` is the description of the PR related to the change and ```` is - the pull request number and ```` are your first and last names. + where ```` is the description of the PR related to the change and + ```` is the pull request number and ```` are + your first and last names. - - Add yourself to list of authors at the end of ``CHANGELOG.rst`` file if not there yet, in alphabetical order. + - Add yourself to list of authors at the end of ``CHANGELOG.rst`` file if not there + yet, in alphabetical order. - #. Add yourself to the `contributors _` list via ``docs/source/contributors.rst``. + #. Add yourself to the + `contributors _` + list via ``docs/source/contributors.rst``. - #. Finally, submit a pull request through the GitHub website using this data:: + #. Finally, submit a pull request through the GitHub website using this data - head-fork: YOUR_GITHUB_USERNAME/esmtools - compare: your-branch-name + ..code:: + head-fork: YOUR_GITHUB_USERNAME/esmtools + compare: your-branch-name - base-fork: bradyrx/esmtools - base: master + base-fork: bradyrx/esmtools + base: master -Note that you can create the Pull Request while you're working on this. The PR will update -as you add more commits. ``esmtools`` developers and contributors can then review your code -and offer suggestions. +Note that you can create the Pull Request while you're working on this. The PR will +update as you add more commits. ``esmtools`` developers and contributors can then +review your code and offer suggestions. diff --git a/docs/source/api.rst b/docs/source/api.rst index 11daf59..ef1ea2b 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -2,7 +2,8 @@ API Reference ============= This page provides an auto-generated summary of esmtools's API. -For more details and examples, refer to the relevant chapters in the main part of the documentation. +For more details and examples, refer to the relevant chapters in the main part of the +documentation. Carbon ------ @@ -16,16 +17,16 @@ Functions related to analyzing ocean (and perhaps terrestrial) biogeochemistry. .. autosummary:: :toctree: api/ + calculate_compatible_emissions co2_sol - schmidt - temp_decomp_takahashi + get_iam_emissions + plot_compatible_emissions potential_pco2 + schmidt spco2_sensitivity spco2_decomposition_index spco2_decomposition - calculate_compatible_emissions - get_iam_emissions - plot_compatible_emissions + temp_decomp_takahashi Composite Analysis ------------------ @@ -43,6 +44,20 @@ some field (e.g., sea surface temperature) when some climate index composite_analysis +Conversions +----------- + +``from esmtools.conversions import ...`` + +.. currentmodule:: esmtools.conversions + +Functions related to unit conversions. + +.. autosummary:: + :toctree: api/ + + convert_mpas_fgco2 + Grid Tools ---------- @@ -84,26 +99,24 @@ Functions related to spatial analysis. .. autosummary:: :toctree: api/ - find_indices extract_region + find_indices -Stats ------ +Statistics +---------- ``from esmtools.stats import ...`` .. currentmodule:: esmtools.stats -Statistical functions. A portion directly wrap functions from ``climpred``. +Functions dealing with statistics. .. autosummary:: :toctree: api/ ACF - area_weight autocorr corr - cos_weight linear_slope linregress polyfit @@ -112,31 +125,32 @@ Statistical functions. A portion directly wrap functions from ``climpred``. rm_trend standardize -Testing -------- +Temporal +-------- -``from esmtools.testing import ...`` +``from esmtools.temporal import ...`` -.. currentmodule:: esmtools.testing +.. currentmodule:: esmtools.temporal -Statistical tests. +Functions related to time. .. autosummary:: :toctree: api/ - ttest_ind_from_stats - multipletests + to_annual -Temporal --------- -``from esmtools.temporal import ...`` +Testing +------- -.. currentmodule:: esmtools.temporal +``from esmtools.testing import ...`` -Functions related to time. +.. currentmodule:: esmtools.testing + +Functions specifically focused on statistical testing. .. autosummary:: :toctree: api/ - to_annual + multipletests + ttest_ind_from_stats diff --git a/docs/source/api/esmtools.conversions.convert_mpas_fgco2.rst b/docs/source/api/esmtools.conversions.convert_mpas_fgco2.rst new file mode 100644 index 0000000..77c0b49 --- /dev/null +++ b/docs/source/api/esmtools.conversions.convert_mpas_fgco2.rst @@ -0,0 +1,6 @@ +esmtools.conversions.convert\_mpas\_fgco2 +========================================= + +.. currentmodule:: esmtools.conversions + +.. autofunction:: convert_mpas_fgco2 diff --git a/docs/source/api/esmtools.stats.area_weight.rst b/docs/source/api/esmtools.stats.area_weight.rst deleted file mode 100644 index 189d6a5..0000000 --- a/docs/source/api/esmtools.stats.area_weight.rst +++ /dev/null @@ -1,6 +0,0 @@ -esmtools.stats.area\_weight -=========================== - -.. currentmodule:: esmtools.stats - -.. autofunction:: area_weight diff --git a/docs/source/api/esmtools.stats.cos_weight.rst b/docs/source/api/esmtools.stats.cos_weight.rst deleted file mode 100644 index 2231a1d..0000000 --- a/docs/source/api/esmtools.stats.cos_weight.rst +++ /dev/null @@ -1,6 +0,0 @@ -esmtools.stats.cos\_weight -========================== - -.. currentmodule:: esmtools.stats - -.. autofunction:: cos_weight diff --git a/docs/source/index.rst b/docs/source/index.rst index c2b18db..7319ffc 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -33,6 +33,7 @@ You can install the latest release of ``esmtools`` using ``pip``: * :doc:`examples` * :doc:`accessors` + .. toctree:: :maxdepth: 1 :hidden: @@ -49,6 +50,7 @@ You can install the latest release of ``esmtools`` using ``pip``: * :doc:`release_procedure` * :doc:`contributors` * :doc:`additional_packages` + .. toctree:: :maxdepth: 1 :hidden: diff --git a/esmtools/carbon.py b/esmtools/carbon.py index d1659bc..08fa27a 100644 --- a/esmtools/carbon.py +++ b/esmtools/carbon.py @@ -10,6 +10,30 @@ from .stats import linregress, nanmean, rm_poly +def calculate_compatible_emissions(global_co2_flux, co2atm_forcing): + """Calculate compatible emissions. + + Args: + global_co2_flux (xr.object): global co2_flux in PgC/yr. + co2atm_forcing (xr.object): prescribed atm. CO2 forcing in ppm. + + Returns: + xr.object: compatible emissions in PgC/yr. + + Reference: + * Jones, Chris, Eddy Robertson, Vivek Arora, Pierre Friedlingstein, Elena + Shevliakova, Laurent Bopp, Victor Brovkin, et al. “Twenty-First-Century + Compatible CO2 Emissions and Airborne Fraction Simulated by CMIP5 Earth + System Models under Four Representative Concentration Pathways.” + Journal of Climate 26, no. 13 (February 1, 2013): 4398–4413. + https://doi.org/10/f44bbn. + + """ + compatible_emissions = co2atm_forcing.diff('time') * 2.12 - global_co2_flux + compatible_emissions.name = 'compatible_emissions' + return compatible_emissions + + @is_xarray([0, 1]) def co2_sol(t, s): """Compute CO2 solubility per the equation used in CESM. @@ -57,93 +81,111 @@ def sol_calc(t, s): return ff -@is_xarray(0) -def schmidt(t): - """Computes the dimensionless Schmidt number. - - .. note:: - The polynomials used are for SST ranges between 0 and 30C and a salinity of 35. - - Args: - t (xarray object): SST (degC) - - Return: - Sc (xarray object): Schmidt number (dimensionless) - - References: - Sarmiento and Gruber (2006). Ocean Biogeochemical Dynamics. Table 3.3.1 +def get_iam_emissions(): + """Download IAM emissions from PIK website. - Examples: - >>> from esmtools.carbon import schmidt - >>> import numpy as np - >>> import xarray as xr - >>> t = xr.DataArray(np.random.randint(10, 25, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']) - >>> Sc = schmidt(t) + Returns: + iam_emissions (xr.object): emissions from the IAMs in PgC/yr. """ + ds = [] + member = ['rcp26', 'rcp45', 'rcp85'] + for r in member: + if r == 'rcp26': + r = 'rcp3pd' + r = r.upper() + link = f'http://www.pik-potsdam.de/~mmalte/rcps/data/{r}_EMISSIONS.xls' + e = pd.read_excel(link, sheet_name=f'{r}_EMISSIONS', skiprows=35, header=2) + e = e.set_index(e.columns[0]) + e.index.name = 'Year' + ds.append(e[['FossilCO2', 'OtherCO2']].to_xarray()) + ds = xr.concat(ds, 'member') + ds = ds.sel(Year=slice(1850, 2100)).rename({'Year': 'time'}) + ds['member'] = member + ds['IAM_emissions'] = ds['FossilCO2'] + ds['OtherCO2'] + return ds['IAM_emissions'] - def calc_schmidt(t): - c = [2073.1, 125.62, 3.6276, 0.043219] - Sc = c[0] - c[1] * t + c[2] * (t ** 2) - c[3] * (t ** 3) - return Sc - - Sc = xr.apply_ufunc( - calc_schmidt, t, input_core_dims=[[]], vectorize=True, dask='allowed' - ) - return Sc - - -@is_xarray(0) -def temp_decomp_takahashi(ds, time_dim='time', temperature='tos', pco2='spco2'): - """Decompose surface pCO2 into thermal and non-thermal components. - .. note:: - This expects cmorized variable names. You can pass keywords to change that - or rename your variables accordingly. +def plot_compatible_emissions( + compatible_emissions, global_co2_flux, iam_emissions=None, ax=None +): + """Plot combatible emissions. Args: - ds (xarray.Dataset): Contains two variables: - * `tos` (sea surface temperature in degC) - * `spco2` (surface pCO2 in uatm) + compatible_emissions (xr.object): compatible_emissions in PgC/yr from + `calculate_compatible_emissions`. + global_co2_flux (xr.object): Global CO2 flux in PgC/yr. + iam_emissions (xr.object): (optional) Emissions from the IAMs in PgC/yr. + Defaults to None. + ax (plt.ax): (optional) matplotlib axis to plot onto. Defaults to None. - Return: - decomp (xr.Dataset): Decomposed thermal and non-thermal components. + Returns: + ax (plt.ax): matplotlib axis. References: - Takahashi, Taro, Stewart C. Sutherland, Colm Sweeney, Alain Poisson, Nicolas - Metzl, Bronte Tilbrook, Nicolas Bates, et al. “Global Sea–Air CO2 Flux - Based on Climatological Surface Ocean PCO2, and Seasonal Biological and - Temperature Effects.” Deep Sea Research Part II: Topical Studies in - Oceanography, The Southern Ocean I: Climatic Changes in the Cycle of - Carbon in the Southern Ocean, 49, no. 9 (January 1,2002): 1601–22. - https://doi.org/10/dmk4f2. + * Jones, Chris, Eddy Robertson, Vivek Arora, Pierre Friedlingstein, Elena + Shevliakova, Laurent Bopp, Victor Brovkin, et al. “Twenty-First-Century + Compatible CO2 Emissions and Airborne Fraction Simulated by CMIP5 Earth + System Models under Four Representative Concentration Pathways.” + Journal of Climate 26, no. 13 (February 1, 2013): 4398–4413. + https://doi.org/10/f44bbn. Fig. 5a + * IPCC AR5 Fig. 6.25 - Examples: - >>> from esmtools.carbon import temp_decomp_takahashi - >>> import numpy as np - >>> import xarray as xr - >>> t = xr.DataArray(np.random.randint(10, 25, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('tos') - >>> pco2 = xr.DataArray(np.random.randint(350, 400, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('spco2') - >>> ds = xr.merge([t, pco2]) - >>> decomp = temp_decomp_takahashi(ds) """ - if temperature not in ds.data_vars: - raise ValueError(f'{temperature} is not a variable in your dataset.') - if pco2 not in ds.data_vars: - raise ValueError(f'{pco2} is not a variable in your dataset.') + if ax is None: + fig, ax = plt.subplots(figsize=(10, 6)) + # hist + alpha = 0.1 + c = 'gray' + compatible_emissions.isel(member=0).sel( + time=slice(None, 2005) + ).to_dataframe().unstack()['compatible_emissions'].plot( + ax=ax, legend=False, color=c, alpha=alpha + ) + compatible_emissions.isel(member=0).sel(time=slice(None, 2005)).mean( + 'initialization' + ).plot(ax=ax, color='w', lw=3) + compatible_emissions.isel(member=0).sel(time=slice(None, 2005)).mean( + 'initialization' + ).plot(ax=ax, color=c, lw=2) + # rcps + colors = ['royalblue', 'orange', 'red'][::-1] + for i, m in enumerate(global_co2_flux.member.values[::-1]): + c = colors[i] + compatible_emissions.sel(member=m).sel( + time=slice(2005, None) + ).to_dataframe().unstack()['compatible_emissions'].plot( + ax=ax, legend=False, color=c, alpha=alpha + ) + compatible_emissions.sel(member=m).sel(time=slice(2005, None)).mean( + 'initialization' + ).plot(ax=ax, color='w', lw=3) + compatible_emissions.sel(member=m).sel(time=slice(2005, None)).mean( + 'initialization' + ).plot(ax=ax, color=c, lw=2) - fac = 0.0432 - tos_mean = ds[temperature].mean(time_dim) - tos_diff = ds[temperature] - tos_mean - thermal = (ds[pco2].mean(time_dim) * (np.exp(tos_diff * fac))).rename('thermal') - non_thermal = (ds[pco2] * (np.exp(tos_diff * -fac))).rename('non_thermal') - decomp = xr.merge([thermal, non_thermal]) - decomp.attrs[ - 'description' - ] = 'Takahashi decomposition of pCO2 into thermal and non-thermal components.' - return decomp + if iam_emissions is not None: + ls = (0, (5, 5)) + iam_emissions.isel(member=0).sel(time=slice(None, 2005)).plot( + ax=ax, color='white', lw=3 + ) + iam_emissions.isel(member=0).sel(time=slice(None, 2005)).plot( + ax=ax, color='gray', lw=2, ls=ls + ) + for i, m in enumerate(global_co2_flux.member.values[::-1]): + c = colors[i] + iam_emissions.sel(member=m).sel(time=slice(2005, None)).plot( + ax=ax, color='white', lw=3 + ) + iam_emissions.sel(member=m).sel(time=slice(2005, None)).plot( + ax=ax, color=c, lw=2, ls=ls + ) + + # fig aestetics + ax.axhline(y=0, ls=':', c='gray') + ax.set_ylabel('Compatible emissions [PgC/yr]') + ax.set_xlabel('Time [year]') + ax.set_title('Compatible emissions') + return ax @is_xarray([0, 1]) @@ -161,8 +203,8 @@ def potential_pco2(t_insitu, pco2_insitu): pco2_potential (xarray object): potential pCO2 with depth Reference: - Sarmiento, Jorge Louis, and Nicolas Gruber. Ocean Biogeochemical Dynamics. - Princeton, NJ: Princeton Univ. Press, 2006., p.421, eq. (10:3:1) + * Sarmiento, Jorge Louis, and Nicolas Gruber. Ocean Biogeochemical Dynamics. + Princeton, NJ: Princeton Univ. Press, 2006., p.421, eq. (10:3:1) Examples: >>> from esmtools.carbon import potential_pco2 @@ -180,79 +222,89 @@ def potential_pco2(t_insitu, pco2_insitu): @is_xarray(0) -def spco2_sensitivity(ds): - """Compute sensitivity of surface pCO2 to changes in driver variables. +def schmidt(t): + """Computes the dimensionless Schmidt number. + + .. note:: + The polynomials used are for SST ranges between 0 and 30C and a salinity of 35. Args: - ds (xr.Dataset): containing cmorized variables: - * spco2 [uatm]: ocean pCO2 at surface - * talkos[mmol m-3]: Alkalinity at ocean surface - * dissicos[mmol m-3]: DIC at ocean surface - * tos [C] : temperature at ocean surface - * sos [psu] : salinity at ocean surface + t (xarray object): SST (degC) - Returns: - sensitivity (xr.Dataset): + Return: + Sc (xarray object): Schmidt number (dimensionless) References: - * Lovenduski, Nicole S., Nicolas Gruber, Scott C. Doney, and Ivan D. Lima. - “Enhanced CO2 Outgassing in the Southern Ocean from a Positive Phase of - the Southern Annular Mode.” Global Biogeochemical Cycles 21, no. 2 - (2007). https://doi.org/10/fpv2wt. - * Sarmiento, Jorge Louis, and Nicolas Gruber. Ocean Biogeochemical Dynamics. - Princeton, NJ: Princeton Univ. Press, 2006., p.421, eq. (10:3:1) + Sarmiento and Gruber (2006). Ocean Biogeochemical Dynamics. Table 3.3.1 Examples: - >>> from esmtools.carbon import spco2_sensitivity + >>> from esmtools.carbon import schmidt >>> import numpy as np >>> import xarray as xr - >>> tos = xr.DataArray(np.random.randint(15, 30, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('tos') - >>> sos = xr.DataArray(np.random.randint(30, 35, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('sos') - >>> spco2 = xr.DataArray(np.random.randint(350, 400, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('spco2') - >>> dissicos = xr.DataArray(np.random.randint(1900, 2100, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('dissicos') - >>> talkos = xr.DataArray(np.random.randint(2100, 2300, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('talkos') - >>> ds = xr.merge([tos, sos, spco2, dissicos, talkos]) - >>> sensitivity = spco2_sensitivity(ds) + >>> t = xr.DataArray(np.random.randint(10, 25, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']) + >>> Sc = schmidt(t) """ - def _check_variables(ds): - requiredVars = ['spco2', 'tos', 'sos', 'talkos', 'dissicos'] - if not all(i in ds.data_vars for i in requiredVars): - missingVars = [i for i in requiredVars if i not in ds.data_vars] - raise ValueError( - f"""Missing variables needed for calculation: - {missingVars}""" - ) - - _check_variables(ds) - # Sensitivities are based on the time-mean for each field. This computes - # sensitivities at each grid cell. - # TODO: Add keyword for sliding mean, as in N year chunks of time to - # account for trends. - DIC = ds['dissicos'] - ALK = ds['talkos'] - SALT = ds['sos'] - pCO2 = ds['spco2'] + def calc_schmidt(t): + c = [2073.1, 125.62, 3.6276, 0.043219] + Sc = c[0] - c[1] * t + c[2] * (t ** 2) - c[3] * (t ** 3) + return Sc - buffer_factor = dict() - buffer_factor['ALK'] = -(ALK ** 2) / ((2 * DIC - ALK) * (ALK - DIC)) - buffer_factor['DIC'] = (3 * ALK * DIC - 2 * DIC ** 2) / ( - (2 * DIC - ALK) * (ALK - DIC) + Sc = xr.apply_ufunc( + calc_schmidt, t, input_core_dims=[[]], vectorize=True, dask='allowed' ) + return Sc + + +@is_xarray(0) +def spco2_decomposition(ds_terms, detrend=True, order=1, deseasonalize=False): + """Decompose oceanic surface pco2 in a first order Taylor-expansion. + + Args: + ds_terms (xr.Dataset): containing cmorized variables: + spco2 [ppm]: ocean pCO2 at surface + talkos[mmol m-3]: Alkalinity at ocean surface + dissicos[mmol m-3]: DIC at ocean surface + tos [C] : temperature at ocean surface + sos [psu] : salinity at ocean surface + detrend (bool): If True, detrend when generating anomalies. Default to + a linear (order 1) regression. + order (int): If detrend is true, the order polynomial to remove from + your time series. + deseasonalize (bool): If True, deseasonalize when generating anomalies. + + Return: + terms_in_pCO2_units (xr.Dataset): terms of spco2 decomposition + + References: + * Lovenduski, Nicole S., Nicolas Gruber, Scott C. Doney, and Ivan D. Lima. + “Enhanced CO2 Outgassing in the Southern Ocean from a Positive Phase of + the Southern Annular Mode.” Global Biogeochemical Cycles 21, no. 2 + (2007). https://doi.org/10/fpv2wt. + + """ + pco2_sensitivity = spco2_sensitivity(ds_terms) + + if detrend and not order: + raise KeyError( + """Please provide the order of polynomial you would like + to remove from your time series.""" + ) + elif detrend: + ds_terms_anomaly = rm_poly(ds_terms, order=order, dim='time') + else: + warnings.warn('Your data are not being detrended.') + ds_terms_anomaly = ds_terms - ds_terms.mean('time') + + if deseasonalize: + clim = ds_terms_anomaly.groupby('time.month').mean('time') + ds_terms_anomaly = ds_terms_anomaly.groupby('time.month') - clim + else: + warnings.warn('Your data are not being deseasonalized.') - # Compute sensitivities - sensitivity = dict() - sensitivity['tos'] = 0.0423 - sensitivity['sos'] = 1 / SALT - sensitivity['talkos'] = (1 / ALK) * buffer_factor['ALK'] - sensitivity['dissicos'] = (1 / DIC) * buffer_factor['DIC'] - sensitivity = xr.Dataset(sensitivity) * pCO2 - return sensitivity + terms_in_pCO2_units = pco2_sensitivity.mean('time') * ds_terms_anomaly + return terms_in_pCO2_units # TODO: adapt for CESM and MPI output. @@ -299,6 +351,7 @@ def spco2_decomposition_index( * Brady, Riley X., et al. "On the role of climate modes in modulating the air–sea CO 2 fluxes in eastern boundary upwelling systems." Biogeosciences 16.2 (2019): 329-346. + """ def regression_against_index(ds, index, psig=None): @@ -356,184 +409,129 @@ def regression_against_index(ds, index, psig=None): @is_xarray(0) -def spco2_decomposition(ds_terms, detrend=True, order=1, deseasonalize=False): - """Decompose oceanic surface pco2 in a first order Taylor-expansion. +def spco2_sensitivity(ds): + """Compute sensitivity of surface pCO2 to changes in driver variables. Args: - ds_terms (xr.Dataset): containing cmorized variables: - spco2 [ppm]: ocean pCO2 at surface - talkos[mmol m-3]: Alkalinity at ocean surface - dissicos[mmol m-3]: DIC at ocean surface - tos [C] : temperature at ocean surface - sos [psu] : salinity at ocean surface - detrend (bool): If True, detrend when generating anomalies. Default to - a linear (order 1) regression. - order (int): If detrend is true, the order polynomial to remove from - your time series. - deseasonalize (bool): If True, deseasonalize when generating anomalies. + ds (xr.Dataset): containing cmorized variables: + * spco2 [uatm]: ocean pCO2 at surface + * talkos[mmol m-3]: Alkalinity at ocean surface + * dissicos[mmol m-3]: DIC at ocean surface + * tos [C] : temperature at ocean surface + * sos [psu] : salinity at ocean surface - Return: - terms_in_pCO2_units (xr.Dataset): terms of spco2 decomposition + Returns: + sensitivity (xr.Dataset): References: * Lovenduski, Nicole S., Nicolas Gruber, Scott C. Doney, and Ivan D. Lima. “Enhanced CO2 Outgassing in the Southern Ocean from a Positive Phase of the Southern Annular Mode.” Global Biogeochemical Cycles 21, no. 2 (2007). https://doi.org/10/fpv2wt. + * Sarmiento, Jorge Louis, and Nicolas Gruber. Ocean Biogeochemical Dynamics. + Princeton, NJ: Princeton Univ. Press, 2006., p.421, eq. (10:3:1) + Examples: + >>> from esmtools.carbon import spco2_sensitivity + >>> import numpy as np + >>> import xarray as xr + >>> tos = xr.DataArray(np.random.randint(15, 30, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('tos') + >>> sos = xr.DataArray(np.random.randint(30, 35, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('sos') + >>> spco2 = xr.DataArray(np.random.randint(350, 400, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('spco2') + >>> dissicos = xr.DataArray(np.random.randint(1900, 2100, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('dissicos') + >>> talkos = xr.DataArray(np.random.randint(2100, 2300, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('talkos') + >>> ds = xr.merge([tos, sos, spco2, dissicos, talkos]) + >>> sensitivity = spco2_sensitivity(ds) """ - pco2_sensitivity = spco2_sensitivity(ds_terms) - - if detrend and not order: - raise KeyError( - """Please provide the order of polynomial you would like - to remove from your time series.""" - ) - elif detrend: - ds_terms_anomaly = rm_poly(ds_terms, order=order, dim='time') - else: - warnings.warn('Your data are not being detrended.') - ds_terms_anomaly = ds_terms - ds_terms.mean('time') - - if deseasonalize: - clim = ds_terms_anomaly.groupby('time.month').mean('time') - ds_terms_anomaly = ds_terms_anomaly.groupby('time.month') - clim - else: - warnings.warn('Your data are not being deseasonalized.') - - terms_in_pCO2_units = pco2_sensitivity.mean('time') * ds_terms_anomaly - return terms_in_pCO2_units - - -def calculate_compatible_emissions(global_co2_flux, co2atm_forcing): - """Calculate compatible emissions. - - Args: - global_co2_flux (xr.object): global co2_flux in PgC/yr. - co2atm_forcing (xr.object): prescribed atm. CO2 forcing in ppm. - - Returns: - xr.object: compatible emissions in PgC/yr. - - Reference: - * Jones, Chris, Eddy Robertson, Vivek Arora, Pierre Friedlingstein, Elena - Shevliakova, Laurent Bopp, Victor Brovkin, et al. “Twenty-First-Century - Compatible CO2 Emissions and Airborne Fraction Simulated by CMIP5 Earth - System Models under Four Representative Concentration Pathways.” - Journal of Climate 26, no. 13 (February 1, 2013): 4398–4413. - https://doi.org/10/f44bbn. - """ - compatible_emissions = co2atm_forcing.diff('time') * 2.12 - global_co2_flux - compatible_emissions.name = 'compatible_emissions' - return compatible_emissions + def _check_variables(ds): + requiredVars = ['spco2', 'tos', 'sos', 'talkos', 'dissicos'] + if not all(i in ds.data_vars for i in requiredVars): + missingVars = [i for i in requiredVars if i not in ds.data_vars] + raise ValueError( + f"""Missing variables needed for calculation: + {missingVars}""" + ) + _check_variables(ds) + # Sensitivities are based on the time-mean for each field. This computes + # sensitivities at each grid cell. + # TODO: Add keyword for sliding mean, as in N year chunks of time to + # account for trends. + DIC = ds['dissicos'] + ALK = ds['talkos'] + SALT = ds['sos'] + pCO2 = ds['spco2'] -def get_iam_emissions(): - """Download IAM emissions from PIK website. + buffer_factor = dict() + buffer_factor['ALK'] = -(ALK ** 2) / ((2 * DIC - ALK) * (ALK - DIC)) + buffer_factor['DIC'] = (3 * ALK * DIC - 2 * DIC ** 2) / ( + (2 * DIC - ALK) * (ALK - DIC) + ) - Returns: - iam_emissions (xr.object): emissions from the IAMs in PgC/yr. + # Compute sensitivities + sensitivity = dict() + sensitivity['tos'] = 0.0423 + sensitivity['sos'] = 1 / SALT + sensitivity['talkos'] = (1 / ALK) * buffer_factor['ALK'] + sensitivity['dissicos'] = (1 / DIC) * buffer_factor['DIC'] + sensitivity = xr.Dataset(sensitivity) * pCO2 + return sensitivity - """ - ds = [] - member = ['rcp26', 'rcp45', 'rcp85'] - for r in member: - if r == 'rcp26': - r = 'rcp3pd' - r = r.upper() - link = f'http://www.pik-potsdam.de/~mmalte/rcps/data/{r}_EMISSIONS.xls' - e = pd.read_excel(link, sheet_name=f'{r}_EMISSIONS', skiprows=35, header=2) - e = e.set_index(e.columns[0]) - e.index.name = 'Year' - ds.append(e[['FossilCO2', 'OtherCO2']].to_xarray()) - ds = xr.concat(ds, 'member') - ds = ds.sel(Year=slice(1850, 2100)).rename({'Year': 'time'}) - ds['member'] = member - ds['IAM_emissions'] = ds['FossilCO2'] + ds['OtherCO2'] - return ds['IAM_emissions'] +@is_xarray(0) +def temp_decomp_takahashi(ds, time_dim='time', temperature='tos', pco2='spco2'): + """Decompose surface pCO2 into thermal and non-thermal components. -def plot_compatible_emissions( - compatible_emissions, global_co2_flux, iam_emissions=None, ax=None -): - """Plot combatible emissions. + .. note:: + This expects cmorized variable names. You can pass keywords to change that + or rename your variables accordingly. Args: - compatible_emissions (xr.object): compatible_emissions in PgC/yr from - `calculate_compatible_emissions`. - global_co2_flux (xr.object): Global CO2 flux in PgC/yr. - iam_emissions (xr.object): (optional) Emissions from the IAMs in PgC/yr. - Defaults to None. - ax (plt.ax): (optional) matplotlib axis to plot onto. Defaults to None. - - Returns: - ax (plt.ax): matplotlib axis. + ds (xarray.Dataset): Contains two variables: + * `tos` (sea surface temperature in degC) + * `spco2` (surface pCO2 in uatm) - """ - """Plot combatible emissions plot. + Return: + decomp (xr.Dataset): Decomposed thermal and non-thermal components. References: - * Jones, Chris, Eddy Robertson, Vivek Arora, Pierre Friedlingstein, Elena - Shevliakova, Laurent Bopp, Victor Brovkin, et al. “Twenty-First-Century - Compatible CO2 Emissions and Airborne Fraction Simulated by CMIP5 Earth - System Models under Four Representative Concentration Pathways.” - Journal of Climate 26, no. 13 (February 1, 2013): 4398–4413. - https://doi.org/10/f44bbn. Fig. 5a - * IPCC AR5 Fig. 6.25 - """ - if ax is None: - fig, ax = plt.subplots(figsize=(10, 6)) - # hist - alpha = 0.1 - c = 'gray' - compatible_emissions.isel(member=0).sel( - time=slice(None, 2005) - ).to_dataframe().unstack()['compatible_emissions'].plot( - ax=ax, legend=False, color=c, alpha=alpha - ) - compatible_emissions.isel(member=0).sel(time=slice(None, 2005)).mean( - 'initialization' - ).plot(ax=ax, color='w', lw=3) - compatible_emissions.isel(member=0).sel(time=slice(None, 2005)).mean( - 'initialization' - ).plot(ax=ax, color=c, lw=2) - # rcps - colors = ['royalblue', 'orange', 'red'][::-1] - for i, m in enumerate(global_co2_flux.member.values[::-1]): - c = colors[i] - compatible_emissions.sel(member=m).sel( - time=slice(2005, None) - ).to_dataframe().unstack()['compatible_emissions'].plot( - ax=ax, legend=False, color=c, alpha=alpha - ) - compatible_emissions.sel(member=m).sel(time=slice(2005, None)).mean( - 'initialization' - ).plot(ax=ax, color='w', lw=3) - compatible_emissions.sel(member=m).sel(time=slice(2005, None)).mean( - 'initialization' - ).plot(ax=ax, color=c, lw=2) + * Takahashi, Taro, Stewart C. Sutherland, Colm Sweeney, Alain Poisson, Nicolas + Metzl, Bronte Tilbrook, Nicolas Bates, et al. “Global Sea–Air CO2 Flux + Based on Climatological Surface Ocean PCO2, and Seasonal Biological and + Temperature Effects.” Deep Sea Research Part II: Topical Studies in + Oceanography, The Southern Ocean I: Climatic Changes in the Cycle of + Carbon in the Southern Ocean, 49, no. 9 (January 1,2002): 1601–22. + https://doi.org/10/dmk4f2. - if iam_emissions is not None: - ls = (0, (5, 5)) - iam_emissions.isel(member=0).sel(time=slice(None, 2005)).plot( - ax=ax, color='white', lw=3 - ) - iam_emissions.isel(member=0).sel(time=slice(None, 2005)).plot( - ax=ax, color='gray', lw=2, ls=ls - ) - for i, m in enumerate(global_co2_flux.member.values[::-1]): - c = colors[i] - iam_emissions.sel(member=m).sel(time=slice(2005, None)).plot( - ax=ax, color='white', lw=3 - ) - iam_emissions.sel(member=m).sel(time=slice(2005, None)).plot( - ax=ax, color=c, lw=2, ls=ls - ) + Examples: + >>> from esmtools.carbon import temp_decomp_takahashi + >>> import numpy as np + >>> import xarray as xr + >>> t = xr.DataArray(np.random.randint(10, 25, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('tos') + >>> pco2 = xr.DataArray(np.random.randint(350, 400, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('spco2') + >>> ds = xr.merge([t, pco2]) + >>> decomp = temp_decomp_takahashi(ds) + """ + if temperature not in ds.data_vars: + raise ValueError(f'{temperature} is not a variable in your dataset.') + if pco2 not in ds.data_vars: + raise ValueError(f'{pco2} is not a variable in your dataset.') - # fig aestetics - ax.axhline(y=0, ls=':', c='gray') - ax.set_ylabel('Compatible emissions [PgC/yr]') - ax.set_xlabel('Time [year]') - ax.set_title('Compatible emissions') - return ax + fac = 0.0432 + tos_mean = ds[temperature].mean(time_dim) + tos_diff = ds[temperature] - tos_mean + thermal = (ds[pco2].mean(time_dim) * (np.exp(tos_diff * fac))).rename('thermal') + non_thermal = (ds[pco2] * (np.exp(tos_diff * -fac))).rename('non_thermal') + decomp = xr.merge([thermal, non_thermal]) + decomp.attrs[ + 'description' + ] = 'Takahashi decomposition of pCO2 into thermal and non-thermal components.' + return decomp diff --git a/esmtools/physics.py b/esmtools/physics.py index 9d58a93..94aa2c8 100644 --- a/esmtools/physics.py +++ b/esmtools/physics.py @@ -1,35 +1,29 @@ -""" -Objects dealing with physical conversions. - -Winds and Wind Stress ---------------------- -`stress_to_speed` : Convert ocean wind stress to U10 wind speed on the native ocean grid -""" import numpy as np import xarray as xr def stress_to_speed(x, y): - """This converts ocean wind stress to wind speed at 10m over the ocean so - that one can use the native ocean grid rather than trying to interpolate between - ocean and atmosphere grids. + """Convert ocean wind stress to wind speed at 10 m over the ocean. - This is based on the conversion used in Lovenduski et al. (2007), which is related - to the CESM coupler conversrion: - http://www.cesm.ucar.edu/models/ccsm3.0/cpl6/users_guide/node20.html + This expects that tau is in dyn/cm2. - tau/rho = 0.0027U + 0.000142U2 + 0.0000764U3 + .. math:: + \\tau = 0.0027 * U + 0.000142 * U2 + 0.0000764 * U3 - Input - ----- - x : DataArray of taux or taux**2 - y : DataArray of tauy or tauy**2 + .. note:: + This is useful for looking at wind speed on the native ocean grid, rather than + trying to interpolate between atmospheric and oceanic grids. + + This is based on the conversion used in Lovenduski et al. (2007), which is related + to the CESM coupler conversion: + http://www.cesm.ucar.edu/models/ccsm3.0/cpl6/users_guide/node20.html - This script expects that tau is in dyn/cm2. + Args: + x (xr.DataArray): ``TAUX`` or ``TAUX2``. + y (xr.DataArray): ``TAUY`` or ``TAUY2``. - Return - ------ - U10 : DataArray of the approximated wind speed. + Returns: + U10 (xr.DataArray): Approximated U10 wind speed. """ tau = ( (np.sqrt(x ** 2 + y ** 2)) / 1.2 * 100 ** 2 / 1e5 @@ -42,5 +36,5 @@ def stress_to_speed(x, y): i = np.imag(r) good = np.where(i == 0) U10[t] = np.real(r[good]) - U10 = xr.DataArray(U10, dims=["time"], coords=[tau["time"]]) + U10 = xr.DataArray(U10, dims=['time'], coords=[tau['time']]) return U10 diff --git a/esmtools/spatial.py b/esmtools/spatial.py index 9965524..faf596d 100644 --- a/esmtools/spatial.py +++ b/esmtools/spatial.py @@ -3,6 +3,45 @@ from .checks import is_xarray +@is_xarray(0) +def extract_region(ds, xgrid, ygrid, coords, lat_dim='lat', lon_dim='lon'): + """Extract a subset of some larger spatial data. + + Args: + ds (xarray object): Data to be subset. + xgrid (array_like): Meshgrid of longitudes. + ygrid (array_like): Meshgrid of latitudes. + coords (1-D array or list): [x0, x1, y0, y1] pertaining to the corners of the + box to extract. + lat_dim (optional str): Latitude dimension name (default 'lat'). + lon_dim (optional str): Longitude dimension name (default 'lon') + + Returns: + subset_data (xarray object): Data subset to domain of interest. + + Examples: + >>> import esmtools as et + >>> import numpy as np + >>> import xarray as xr + >>> x = np.linspace(0, 360, 37) + >>> y = np.linspace(-90, 90, 19) + >>> xx, yy = np.meshgrid(x, y) + >>> ds = xr.DataArray(np.random.rand(19, 37), dims=['lat', 'lon']) + >>> ds['latitude'] = (('lat', 'lon'), yy) + >>> ds['longitude'] = (('lat', 'lon'), xx) + >>> coords = [0, 30, -20, 20] + >>> subset = et.spatial.extract_region(ds, xx, yy, coords) + """ + # Extract the corners of the box. + x0, x1, y0, y1 = coords + # Find indices on meshgrid for the box corners. + a, c = find_indices(xgrid, ygrid, x0, y0) + b, d = find_indices(xgrid, ygrid, x1, y1) + # Slice is not inclusive, so need to add one to end. + subset_data = ds.isel({lat_dim: slice(a, b + 1), lon_dim: slice(c, d + 1)}) + return subset_data + + def find_indices(xgrid, ygrid, xpoint, ypoint): """Returns the i, j index for a latitude/longitude point on a grid. @@ -42,42 +81,3 @@ def find_indices(xgrid, ygrid, xpoint, ypoint): min_ix = np.nanargmin(reduced_grid) i, j = np.unravel_index(min_ix, reduced_grid.shape) return i, j - - -@is_xarray(0) -def extract_region(ds, xgrid, ygrid, coords, lat_dim="lat", lon_dim="lon"): - """Extract a subset of some larger spatial data. - - Args: - ds (xarray object): Data to be subset. - xgrid (array_like): Meshgrid of longitudes. - ygrid (array_like): Meshgrid of latitudes. - coords (1-D array or list): [x0, x1, y0, y1] pertaining to the corners of the - box to extract. - lat_dim (optional str): Latitude dimension name (default 'lat'). - lon_dim (optional str): Longitude dimension name (default 'lon') - - Returns: - subset_data (xarray object): Data subset to domain of interest. - - Examples: - >>> import esmtools as et - >>> import numpy as np - >>> import xarray as xr - >>> x = np.linspace(0, 360, 37) - >>> y = np.linspace(-90, 90, 19) - >>> xx, yy = np.meshgrid(x, y) - >>> ds = xr.DataArray(np.random.rand(19, 37), dims=['lat', 'lon']) - >>> ds['latitude'] = (('lat', 'lon'), yy) - >>> ds['longitude'] = (('lat', 'lon'), xx) - >>> coords = [0, 30, -20, 20] - >>> subset = et.spatial.extract_region(ds, xx, yy, coords) - """ - # Extract the corners of the box. - x0, x1, y0, y1 = coords - # Find indices on meshgrid for the box corners. - a, c = find_indices(xgrid, ygrid, x0, y0) - b, d = find_indices(xgrid, ygrid, x1, y1) - # Slice is not inclusive, so need to add one to end. - subset_data = ds.isel({lat_dim: slice(a, b + 1), lon_dim: slice(c, d + 1)}) - return subset_data diff --git a/esmtools/stats.py b/esmtools/stats.py index a40bff8..5f0d861 100644 --- a/esmtools/stats.py +++ b/esmtools/stats.py @@ -8,128 +8,7 @@ from .checks import has_dims, has_missing, is_xarray from .timeutils import TimeUtilAccessor -from .utils import get_dims, match_nans - - -@is_xarray(0) -def standardize(ds, dim="time"): - """Standardize Dataset/DataArray - - .. math:: - \\frac{x - \\mu_{x}}{\\sigma_{x}} - - Args: - ds (xarray object): Dataset or DataArray with variable(s) to standardize. - dim (optional str): Which dimension to standardize over (default 'time'). - - Returns: - stdized (xarray object): Standardized variable(s). - """ - stdized = (ds - ds.mean(dim)) / ds.std(dim) - return stdized - - -@is_xarray(0) -def nanmean(ds, dim="time"): - """Compute mean NaNs and suppress warning from numpy""" - if "time" in ds.dims: - mask = ds.isnull().isel(time=0) - else: - mask = ds.isnull() - ds = ds.fillna(0).mean(dim) - ds = ds.where(~mask) - return ds - - -@is_xarray(0) -def cos_weight(da, lat_coord="lat", lon_coord="lon", one_dimensional=True): - """ - Area-weights data on a regular (e.g. 360x180) grid that does not come with - cell areas. Uses cosine-weighting. - - Parameters - ---------- - da : DataArray with longitude and latitude - lat_coord : str (optional) - Name of latitude coordinate - lon_coord : str (optional) - Name of longitude coordinate - one_dimensional : bool (optional) - If true, assumes that lat and lon are 1D (i.e. not a meshgrid) - - Returns - ------- - aw_da : Area-weighted DataArray - - Examples - -------- - import esmtools as et - da_aw = et.stats.reg_aw(SST) - """ - non_spatial = [i for i in get_dims(da) if i not in [lat_coord, lon_coord]] - filter_dict = {} - while len(non_spatial) > 0: - filter_dict.update({non_spatial[0]: 0}) - non_spatial.pop(0) - if one_dimensional: - lon, lat = np.meshgrid(da[lon_coord], da[lat_coord]) - else: - lat = da[lat_coord] - # NaN out land to not go into area-weighting - lat = lat.astype("float") - nan_mask = np.asarray(da.isel(filter_dict).isnull()) - lat[nan_mask] = np.nan - cos_lat = np.cos(np.deg2rad(lat)) - aw_da = (da * cos_lat).sum(lat_coord).sum(lon_coord) / np.nansum(cos_lat) - return aw_da - - -@is_xarray(0) -def area_weight(da, area_coord="area"): - """ - Returns an area-weighted time series from the input xarray dataarray. This - automatically figures out spatial dimensions vs. other dimensions. I.e., - this function works for just a single realization or for many realizations. - See `reg_aw` if you have a regular (e.g. 360x180) grid that does not - contain cell areas. - - It also looks like xarray is implementing a feature like this. - - NOTE: This currently does not support datasets (of multiple variables) - The user can alleviate this by using the .apply() function. - - Parameters - ---------- - da : DataArray - area_coord : str (defaults to 'area') - Name of area coordinate if different from 'area' - - Returns - ------- - aw_da : Area-weighted DataArray - """ - area = da[area_coord] - # Mask the area coordinate in case you've got a bunch of NaNs, e.g. a mask - # or land. - dimlist = get_dims(da) - # Pull out coordinates that aren't spatial. Time, ensemble members, etc. - non_spatial = [i for i in dimlist if i not in get_dims(area)] - filter_dict = {} - while len(non_spatial) > 0: - filter_dict.update({non_spatial[0]: 0}) - non_spatial.pop(0) - masked_area = area.where(da.isel(filter_dict).notnull()) - # Compute area-weighting. - dimlist = get_dims(masked_area) - aw_da = da * masked_area - # Sum over arbitrary number of dimensions. - while len(dimlist) > 0: - print(f"Summing over {dimlist[0]}") - aw_da = aw_da.sum(dimlist[0]) - dimlist.pop(0) - # Finish area-weighting by dividing by sum of area coordinate. - aw_da = aw_da / masked_area.sum() - return aw_da +from .utils import match_nans def _check_y_not_independent_variable(y, dim): @@ -146,8 +25,8 @@ def _check_y_not_independent_variable(y, dim): """ if isinstance(y, xr.DataArray) and (y.name == dim): raise ValueError( - f"Dependent variable y should not be the same as the dim {dim} being " - "applied over. Change your y variable to x." + f'Dependent variable y should not be the same as the dim {dim} being ' + 'applied over. Change your y variable to x.' ) @@ -203,12 +82,12 @@ def _handle_nans(x, y, nan_policy): # Only support 1D, since we are doing `~np.isnan()` indexing for 'omit'/'drop'. if (x.ndim > 1) or (y.ndim > 1): raise ValueError( - f"x and y must be 1-dimensional. Got {x.ndim} for x and {y.ndim} for y." + f'x and y must be 1-dimensional. Got {x.ndim} for x and {y.ndim} for y.' ) - if nan_policy in ["none", "propagate"]: + if nan_policy in ['none', 'propagate']: return x, y - elif nan_policy == "raise": + elif nan_policy == 'raise': if has_missing(x) or has_missing(y): raise ValueError( "Input data contains NaNs. Consider changing `nan_policy` to 'none' " @@ -216,7 +95,7 @@ def _handle_nans(x, y, nan_policy): ) else: return x, y - elif nan_policy in ["omit", "drop"]: + elif nan_policy in ['omit', 'drop']: if has_missing(x) or has_missing(y): x_mod, y_mod = match_nans(x, y) # The above function pairwise-matches nans. Now we remove them so that we @@ -256,11 +135,11 @@ def _polyfit(x, y, order, nan_policy): """ x_mod, y_mod = _handle_nans(x, y, nan_policy) # This catches cases where a given grid cell is full of nans, like in land masking. - if (nan_policy in ["omit", "drop"]) and (x_mod.size == 0): + if (nan_policy in ['omit', 'drop']) and (x_mod.size == 0): return np.full(len(x), np.nan) # This catches cases where there is missing values in the independent axis, which # breaks polyfit. - elif (nan_policy in ["none", "propagate"]) and (has_missing(x_mod)): + elif (nan_policy in ['none', 'propagate']) and (has_missing(x_mod)): return np.full(len(x), np.nan) else: # fit to data without nans, return applied to original independent axis. @@ -279,14 +158,115 @@ def _warn_if_not_converted_to_original_time_units(x): if x.timeutils.is_temporal: if x.timeutils.freq is None: warnings.warn( - "Datetime frequency not detected. Slope and std. errors will be " - "in original units per day (e.g., degC per day). Multiply by " - "e.g., 365.25 to convert to original units per year." + 'Datetime frequency not detected. Slope and std. errors will be ' + 'in original units per day (e.g., degC per day). Multiply by ' + 'e.g., 365.25 to convert to original units per year.' ) +@is_xarray(0) +def ACF(ds, dim='time', nlags=None): + """Compute the ACF of a time series to a specific lag. + + Args: + ds (xarray object): dataset/dataarray containing the time series. + dim (str): dimension to apply ACF over. + nlags (optional int): number of lags to compute ACF over. If None, + compute for length of `dim` on `ds`. + + Returns: + Dataset or DataArray with ACF results. + + Notes: + This is preferred over ACF functions from MATLAB/scipy, since it doesn't + use FFT methods. + """ + # Drop variables that don't have requested dimension, so this can be + # applied over the full dataset. + if isinstance(ds, xr.Dataset): + dropVars = [i for i in ds if dim not in ds[i].dims] + ds = ds.drop(dropVars) + + # Loop through every step in `dim` + if nlags is None: + nlags = ds[dim].size + + acf = [] + # The 2 factor accounts for fact that time series reduces in size for + # each lag. + for i in range(nlags - 2): + res = autocorr(ds, lag=i, dim=dim) + acf.append(res) + acf = xr.concat(acf, dim=dim) + return acf + + +@is_xarray(0) +def autocorr(ds, lag=1, dim='time', return_p=False): + """Calculated lagged correlation of a xr.Dataset. + + Args: + ds (xarray object): Dataset to compute autocorrelation with. + lag (int, optional): Lag to compute autocorrelation at. Defaults to 1. + dim (str, optional): Dimension to compute autocorrelation over. + Default to 'time'. + return_p (bool, optional): If True, return just the correlation coefficient. + If False, return both the correlation coefficient and p value. + + Returns: + r (xarray object): Pearson correlation coefficient. + p (xarray object): P value, if ``return_p`` is True. + """ + return st.autocorr(ds, lag=lag, dim=dim, return_p=return_p) + + +@is_xarray(0) +def corr(x, y, dim='time', lead=0, return_p=False): + """Computes the Pearson product-moment coefficient of linear correlation. + + This version calculates the effective degrees of freedom, accounting + for autocorrelation within each time series that could fluff the + significance of the correlation. + + Args: + x, y (xarray.DataArray): Time series being correlated. + dim (str, optional): Dimension to calculate correlation over. Defaults to + 'time'. + lead (int, optional): If lead > 0, ``x`` leads ``y`` by that many time steps. + If lead < 0, ``x`` lags ``y`` by that many time steps. Defaults to 0. + return_p (bool, optional). If True, return both ``r`` and ``p``. Otherwise, + just return ``r``. Defaults to False. + + Returns: + r (xarray object): Pearson correlation coefficient. + p (xarray object): P value, if ``return_p`` is True. + + References: + * Wilks, Daniel S. Statistical methods in the atmospheric sciences. + Vol. 100. Academic press, 2011. + * Lovenduski, Nicole S., and Nicolas Gruber. "Impact of the Southern + Annular Mode on Southern Ocean circulation and biology." Geophysical + Research Letters 32.11 (2005). + * Brady, R. X., Lovenduski, N. S., Alexander, M. A., Jacox, M., and + Gruber, N.: On the role of climate modes in modulating the air-sea CO2 + fluxes in Eastern Boundary Upwelling Systems, Biogeosciences Discuss., + https://doi.org/10.5194/bg-2018-415, 2019. + + """ + # Broadcasts a time series to the same coordinates/size as the grid. If they + # are both grids, this function does nothing and isn't expensive. + x, y = xr.broadcast(x, y) + + # Negative lead should have y lead x. + if lead < 0: + lead = np.abs(lead) + return st.corr(y, x, dim=dim, lag=lead, return_p=return_p) + else: + return st.corr(x, y, dim=dim, lag=lead, return_p=return_p) + + @is_xarray([0, 1]) -def linear_slope(x, y, dim="time", nan_policy="none"): +def linear_slope(x, y, dim='time', nan_policy='none'): """Returns the linear slope with y regressed onto x. .. note:: @@ -314,8 +294,8 @@ def linear_slope(x, y, dim="time", nan_policy="none"): Returns: xarray object: Slopes computed through a least-squares linear regression. """ - has_dims(x, dim, "predictor (x)") - has_dims(y, dim, "predictand (y)") + has_dims(x, dim, 'predictor (x)') + has_dims(y, dim, 'predictand (y)') _check_y_not_independent_variable(y, dim) x, slope_factor = _convert_time_and_return_slope_factor(x, dim) @@ -323,11 +303,11 @@ def _linear_slope(x, y, nan_policy): x, y = _handle_nans(x, y, nan_policy) # This catches cases where a given grid cell is full of nans, like in # land masking. - if (nan_policy in ["omit", "drop"]) and (x.size == 0): + if (nan_policy in ['omit', 'drop']) and (x.size == 0): return np.asarray([np.nan]) # This catches cases where there is missing values in the independent axis, # which breaks polyfit. - elif (nan_policy in ["none", "propagate"]) and (has_missing(x)): + elif (nan_policy in ['none', 'propagate']) and (has_missing(x)): return np.asarray([np.nan]) else: return np.polyfit(x, y, 1)[0] @@ -338,16 +318,16 @@ def _linear_slope(x, y, nan_policy): y, nan_policy, vectorize=True, - dask="parallelized", + dask='parallelized', input_core_dims=[[dim], [dim], []], - output_dtypes=["float64"], + output_dtypes=['float64'], ) _warn_if_not_converted_to_original_time_units(x) return slopes * slope_factor @is_xarray([0, 1]) -def linregress(x, y, dim="time", nan_policy="none"): +def linregress(x, y, dim='time', nan_policy='none'): """Vectorized applciation of ``scipy.stats.linregress``. .. note:: @@ -378,8 +358,8 @@ def linregress(x, y, dim="time", nan_policy="none"): "parameter". """ - has_dims(x, dim, "predictor (x)") - has_dims(y, dim, "predictand (y)") + has_dims(x, dim, 'predictor (x)') + has_dims(y, dim, 'predictand (y)') _check_y_not_independent_variable(y, dim) x, slope_factor = _convert_time_and_return_slope_factor(x, dim) @@ -387,7 +367,7 @@ def _linregress(x, y, slope_factor, nan_policy): x, y = _handle_nans(x, y, nan_policy) # This catches cases where a given grid cell is full of nans, like in # land masking. - if (nan_policy in ["omit", "drop"]) and (x.size == 0): + if (nan_policy in ['omit', 'drop']) and (x.size == 0): return np.full(5, np.nan) else: m, b, r, p, e = scipy.stats.linregress(x, y) @@ -404,21 +384,41 @@ def _linregress(x, y, slope_factor, nan_policy): slope_factor, nan_policy, vectorize=True, - dask="parallelized", + dask='parallelized', input_core_dims=[[dim], [dim], [], []], - output_core_dims=[["parameter"]], - output_dtypes=["float64"], - output_sizes={"parameter": 5}, + output_core_dims=[['parameter']], + output_dtypes=['float64'], + output_sizes={'parameter': 5}, ) results = results.assign_coords( - parameter=["slope", "intercept", "rvalue", "pvalue", "stderr"] + parameter=['slope', 'intercept', 'rvalue', 'pvalue', 'stderr'] ) _warn_if_not_converted_to_original_time_units(x) return results @is_xarray(0) -def polyfit(x, y, order, dim="time", nan_policy="none"): +def nanmean(ds, dim='time'): + """Compute mean of data with NaNs and suppress warning from numpy. + + Args: + ds (xarray object): Dataset to compute mean over. + dim (str, optional): Dimension to compute mean over. + + Returns + xarray object: Reduced by ``dim`` via mean operation. + """ + if 'time' in ds.dims: + mask = ds.isnull().isel(time=0) + else: + mask = ds.isnull() + ds = ds.fillna(0).mean(dim) + ds = ds.where(~mask) + return ds + + +@is_xarray(0) +def polyfit(x, y, order, dim='time', nan_policy='none'): """Returns the fitted polynomial line of ``y`` regressed onto ``x``. .. note:: @@ -444,8 +444,8 @@ def polyfit(x, y, order, dim="time", nan_policy="none"): xarray object: The polynomial fit for ``y`` regressed onto ``x``. Has the same dimensions as ``y``. """ - has_dims(x, dim, "predictor (x)") - has_dims(y, dim, "predictand (y)") + has_dims(x, dim, 'predictor (x)') + has_dims(y, dim, 'predictand (y)') _check_y_not_independent_variable(y, dim) x, _ = _convert_time_and_return_slope_factor(x, dim) @@ -456,15 +456,15 @@ def polyfit(x, y, order, dim="time", nan_policy="none"): order, nan_policy, vectorize=True, - dask="parallelized", + dask='parallelized', input_core_dims=[[dim], [dim], [], []], output_core_dims=[[dim]], - output_dtypes=["float"], + output_dtypes=['float'], ) @is_xarray(0) -def rm_poly(x, y, order, dim="time", nan_policy="none"): +def rm_poly(x, y, order, dim='time', nan_policy='none'): """Removes a polynomial fit from ``y`` regressed onto ``x``. Args: @@ -485,8 +485,8 @@ def rm_poly(x, y, order, dim="time", nan_policy="none"): Returns: xarray object: ``y`` with polynomial fit of order ``order`` removed. """ - has_dims(x, dim, "predictor (x)") - has_dims(y, dim, "predictand (y)") + has_dims(x, dim, 'predictor (x)') + has_dims(y, dim, 'predictand (y)') _check_y_not_independent_variable(y, dim) x, _ = _convert_time_and_return_slope_factor(x, dim) @@ -501,15 +501,15 @@ def _rm_poly(x, y, order, nan_policy): order, nan_policy, vectorize=True, - dask="parallelized", + dask='parallelized', input_core_dims=[[dim], [dim], [], []], output_core_dims=[[dim]], - output_dtypes=["float64"], + output_dtypes=['float64'], ) @is_xarray(0) -def rm_trend(x, y, dim="time", nan_policy="none"): +def rm_trend(x, y, dim='time', nan_policy='none'): """Removes a linear fit from ``y`` regressed onto ``x``. Args: @@ -533,110 +533,18 @@ def rm_trend(x, y, dim="time", nan_policy="none"): @is_xarray(0) -def autocorr(ds, lag=1, dim="time", return_p=False): - """ - Calculated lagged correlation of a xr.Dataset. - Parameters - ---------- - ds : xarray dataset/dataarray - lag : int (default 1) - number of time steps to lag correlate. - dim : str (default 'time') - name of time dimension/dimension to autocorrelate over - return_p : boolean (default False) - if false, return just the correlation coefficient. - if true, return both the correlation coefficient and p-value. - Returns - ------- - r : Pearson correlation coefficient - p : (if return_p True) p-value - """ - return st.autocorr(ds, lag=lag, dim=dim, return_p=return_p) - +def standardize(ds, dim='time'): + """Standardize Dataset/DataArray -@is_xarray(0) -def ACF(ds, dim="time", nlags=None): - """ - Compute the ACF of a time series to a specific lag. + .. math:: + \\frac{x - \\mu_{x}}{\\sigma_{x}} Args: - ds (xarray object): dataset/dataarray containing the time series. - dim (str): dimension to apply ACF over. - nlags (optional int): number of lags to compute ACF over. If None, - compute for length of `dim` on `ds`. + ds (xarray object): Dataset or DataArray with variable(s) to standardize. + dim (optional str): Which dimension to standardize over (default 'time'). Returns: - Dataset or DataArray with ACF results. - - Notes: - This is preferred over ACF functions from MATLAB/scipy, since it doesn't - use FFT methods. - """ - # Drop variables that don't have requested dimension, so this can be - # applied over the full dataset. - if isinstance(ds, xr.Dataset): - dropVars = [i for i in ds if dim not in ds[i].dims] - ds = ds.drop(dropVars) - - # Loop through every step in `dim` - if nlags is None: - nlags = ds[dim].size - - acf = [] - # The 2 factor accounts for fact that time series reduces in size for - # each lag. - for i in range(nlags - 2): - res = autocorr(ds, lag=i, dim=dim) - acf.append(res) - acf = xr.concat(acf, dim=dim) - return acf - - -@is_xarray(0) -def corr(x, y, dim="time", lead=0, return_p=False): - """ - Computes the Pearson product-momment coefficient of linear correlation. - - This version calculates the effective degrees of freedom, accounting - for autocorrelation within each time series that could fluff the - significance of the correlation. - - Parameters - ---------- - x, y : xarray DataArray - time series being correlated (can be multi-dimensional) - dim : str (default 'time') - Correlation dimension - lead : int (default 0) - If lead > 0, x leads y by that many time steps. - If lead < 0, x lags y by that many time steps. - return_p : boolean (default False) - If true, return both r and p - - Returns - ------- - r : correlation coefficient - p : p-value accounting for autocorrelation (if return_p True) - - References (for dealing with autocorrelation): - ---------- - 1. Wilks, Daniel S. Statistical methods in the atmospheric sciences. - Vol. 100. Academic press, 2011. - 2. Lovenduski, Nicole S., and Nicolas Gruber. "Impact of the Southern - Annular Mode on Southern Ocean circulation and biology." Geophysical - Research Letters 32.11 (2005). - 3. Brady, R. X., Lovenduski, N. S., Alexander, M. A., Jacox, M., and - Gruber, N.: On the role of climate modes in modulating the air-sea CO2 - fluxes in Eastern Boundary Upwelling Systems, Biogeosciences Discuss., - https://doi.org/10.5194/bg-2018-415, in review, 2018. + stdized (xarray object): Standardized variable(s). """ - # Broadcasts a time series to the same coordinates/size as the grid. If they - # are both grids, this function does nothing and isn't expensive. - x, y = xr.broadcast(x, y) - - # Negative lead should have y lead x. - if lead < 0: - lead = np.abs(lead) - return st.corr(y, x, dim=dim, lag=lead, return_p=return_p) - else: - return st.corr(x, y, dim=dim, lag=lead, return_p=return_p) + stdized = (ds - ds.mean(dim)) / ds.std(dim) + return stdized diff --git a/esmtools/testing.py b/esmtools/testing.py index 8e3cb13..b745bd8 100644 --- a/esmtools/testing.py +++ b/esmtools/testing.py @@ -6,24 +6,7 @@ from .checks import is_xarray from .constants import MULTIPLE_TESTS -__all__ = ["ttest_ind_from_stats", "multipletests"] - - -def ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2): - """Parallelize scipy.stats.ttest_ind_from_stats.""" - return xr.apply_ufunc( - tti_from_stats, - mean1, - std1, - nobs1, - mean2, - std2, - nobs2, - input_core_dims=[[], [], [], [], [], []], - output_core_dims=[[], []], - vectorize=True, - dask="parallelized", - ) +__all__ = ['ttest_ind_from_stats', 'multipletests'] @is_xarray(0) @@ -55,12 +38,14 @@ def multipletests(p, alpha=0.05, method=None, **multipletests_kwargs): pvals_corrected (xr.object): p-values corrected for multiple tests Example: - reject, xpvals_corrected = xr_multipletest(p, method='fdr_bh') + >>> from esmtools.testing import multipletests + >>> reject, xpvals_corrected = multipletests(p, method='fdr_bh') + """ if method is None: raise ValueError( f"Please indicate a method using the 'method=...' keyword. " - f"Select from {MULTIPLE_TESTS}" + f'Select from {MULTIPLE_TESTS}' ) elif method not in MULTIPLE_TESTS: raise ValueError( @@ -81,4 +66,35 @@ def multipletests(p, alpha=0.05, method=None, **multipletests_kwargs): p_stacked[mask], alpha=alpha, method=method, **multipletests_kwargs ) - return reject.unstack("s"), pvals_corrected.unstack("s") + return reject.unstack('s'), pvals_corrected.unstack('s') + + +def ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var=True): + """Parallelize scipy.stats.ttest_ind_from_stats and make dask-compatible. + + Args: + mean1, mean2 (array_like): The means of samples 1 and 2. + std1, std2 (array_like): The standard deviations of samples 1 and 2. + nobs1, nobs2 (array_like): The number of observations for samples 1 and 2. + equal_var (bool, optional): If True (default), perform a standard independent + 2 sample test that assumes equal population variances. If False, perform + Welch's t-test, which does not assume equal population variance. + + Returns: + statistic (float or array): The calculated t-statistics. + pvalue (float or array): The two-tailed p-value. + """ + return xr.apply_ufunc( + tti_from_stats, + mean1, + std1, + nobs1, + mean2, + std2, + nobs2, + equal_var, + input_core_dims=[[], [], [], [], [], [], []], + output_core_dims=[[], []], + vectorize=True, + dask='parallelized', + ) diff --git a/esmtools/timeutils.py b/esmtools/timeutils.py index 7320d4f..c5106e1 100644 --- a/esmtools/timeutils.py +++ b/esmtools/timeutils.py @@ -7,8 +7,11 @@ from .constants import DAYS_PER_MONTH, DAYS_PER_YEAR -@xr.register_dataarray_accessor("timeutils") +@xr.register_dataarray_accessor('timeutils') class TimeUtilAccessor: + """Accessor for cftime, datetime, and timeoffset indexed DataArrays. This aids + in modifying time axes for slope correction and for converting to numeric time.""" + def __init__(self, xarray_obj): self._obj = xarray_obj self._is_temporal = contains_datetime_like_objects(self._obj) @@ -57,61 +60,61 @@ def slope_factor(self): def return_numeric_time(self): """Returns numeric time.""" if self.is_datetime_like: - x = (self._obj - np.datetime64("1990-01-01")) / np.timedelta64(1, "D") + x = (self._obj - np.datetime64('1990-01-01')) / np.timedelta64(1, 'D') return x elif self.is_cftime_like: x = cftime.date2num( - self._obj, "days since 1990-01-01", calendar=self.calendar + self._obj, 'days since 1990-01-01', calendar=self.calendar ) x = xr.DataArray(x, dims=self._obj.dims, coords=self._obj.coords) return x else: - raise ValueError("DataArray is not a time array of datetimes or cftimes.") + raise ValueError('DataArray is not a time array of datetimes or cftimes.') @staticmethod def construct_quarterly_aliases(): - quarters = ["Q", "BQ", "QS", "BQS"] + quarters = ['Q', 'BQ', 'QS', 'BQS'] for month in [ - "JAN", - "FEB", - "MAR", - "APR", - "MAY", - "JUN", - "JUL", - "AUG", - "SEP", - "OCT", - "NOV", - "DEC", + 'JAN', + 'FEB', + 'MAR', + 'APR', + 'MAY', + 'JUN', + 'JUL', + 'AUG', + 'SEP', + 'OCT', + 'NOV', + 'DEC', ]: - quarters.append(f"Q-{month}") - quarters.append(f"BQ-{month}") - quarters.append(f"BQS-{month}") - quarters.append(f"QS-{month}") + quarters.append(f'Q-{month}') + quarters.append(f'BQ-{month}') + quarters.append(f'BQS-{month}') + quarters.append(f'QS-{month}') return quarters @staticmethod def construct_annual_aliases(): - years = ["A", "Y", "BA", "BY", "AS", "YS", "BAS", "BYS", "Q"] + years = ['A', 'Y', 'BA', 'BY', 'AS', 'YS', 'BAS', 'BYS', 'Q'] for month in [ - "JAN", - "FEB", - "MAR", - "APR", - "MAY", - "JUN", - "JUL", - "AUG", - "SEP", - "OCT", - "NOV", - "DEC", + 'JAN', + 'FEB', + 'MAR', + 'APR', + 'MAY', + 'JUN', + 'JUL', + 'AUG', + 'SEP', + 'OCT', + 'NOV', + 'DEC', ]: - years.append(f"A-{month}") - years.append(f"BA-{month}") - years.append(f"BAS-{month}") - years.append(f"AS-{month}") + years.append(f'A-{month}') + years.append(f'BA-{month}') + years.append(f'BAS-{month}') + years.append(f'AS-{month}') return years def construct_slope_factors(self): @@ -121,21 +124,21 @@ def construct_slope_factors(self): quarters = self.construct_quarterly_aliases() quarters = {k: self.annual_factor / 4 for k in quarters} - months = ("M", "BM", "CBM", "MS", "BMS", "CBMS") + months = ('M', 'BM', 'CBM', 'MS', 'BMS', 'CBMS') months = {k: self.annual_factor / 12 for k in months} - semimonths = {k: 15 for k in ("SM", "SMS")} + semimonths = {k: 15 for k in ('SM', 'SMS')} - weeks = ("W", "W-SUN", "W-MON", "W-TUE", "W-WED", "W-THU", "W-FRI", "W-SAT") + weeks = ('W', 'W-SUN', 'W-MON', 'W-TUE', 'W-WED', 'W-THU', 'W-FRI', 'W-SAT') weeks = {k: 7 for k in weeks} - days = {k: 1 for k in ("B", "C", "D")} - hours = {k: 1 / 24 for k in ("BH", "H")} - mins = {k: 1 / (24 * 60) for k in ("T", "min")} - secs = {k: 1 / (24 * 60 * 60) for k in ("S")} - millisecs = {k: 1 / (24 * 60 * 60 * 1e3) for k in ("ms", "L")} - microsecs = {k: 1 / (24 * 60 * 60 * 1e6) for k in ("U", "us")} - nanosecs = {k: 1 / (24 * 60 * 60 * 1e9) for k in ("N")} + days = {k: 1 for k in ('B', 'C', 'D')} + hours = {k: 1 / 24 for k in ('BH', 'H')} + mins = {k: 1 / (24 * 60) for k in ('T', 'min')} + secs = {k: 1 / (24 * 60 * 60) for k in ('S')} + millisecs = {k: 1 / (24 * 60 * 60 * 1e3) for k in ('ms', 'L')} + microsecs = {k: 1 / (24 * 60 * 60 * 1e6) for k in ('U', 'us')} + nanosecs = {k: 1 / (24 * 60 * 60 * 1e9) for k in ('N')} DATETIME_FACTOR = {} for d in ( @@ -178,13 +181,13 @@ def get_calendar(dates): Raises: ValueError: If inferred calendar is not in our list of supported calendars. """ - if np.asarray(dates).dtype == "datetime64[ns]": - return "proleptic_gregorian" + if np.asarray(dates).dtype == 'datetime64[ns]': + return 'proleptic_gregorian' else: return np.asarray(dates).ravel()[0].calendar -def get_days_per_month(time, calendar="standard"): +def get_days_per_month(time, calendar='standard'): """Return an array of days per month corresponding to a given calendar. Args: @@ -197,7 +200,7 @@ def get_days_per_month(time, calendar="standard"): Raises: ValueError: If input time index is not a CFTimeIndex or DatetimeIndex. """ - is_time_index(time, "time index") + is_time_index(time, 'time index') month_length = np.zeros(len(time), dtype=np.int) cal_days = DAYS_PER_MONTH[calendar] @@ -217,15 +220,15 @@ def is_time_index(xobj, kind): i.e. through .to_index() """ xtype = type(xobj).__name__ - if xtype not in ["CFTimeIndex", "DatetimeIndex"]: + if xtype not in ['CFTimeIndex', 'DatetimeIndex']: raise ValueError( - f"Your {kind} object must be either an xr.CFTimeIndex or " - f"pd.DatetimeIndex." + f'Your {kind} object must be either an xr.CFTimeIndex or ' + f'pd.DatetimeIndex.' ) return True -def leap_year(year, calendar="standard"): +def leap_year(year, calendar='standard'): """Determine if year is a leap year. Args: @@ -236,18 +239,18 @@ def leap_year(year, calendar="standard"): bool: True if year is a leap year. """ leap = False - if (calendar in ["standard", "gregorian", "proleptic_gregorian", "julian"]) and ( + if (calendar in ['standard', 'gregorian', 'proleptic_gregorian', 'julian']) and ( year % 4 == 0 ): leap = True if ( - (calendar == "proleptic_gregorian") + (calendar == 'proleptic_gregorian') and (year % 100 == 0) and (year % 400 != 0) ): leap = False elif ( - (calendar in ["standard", "gregorian"]) + (calendar in ['standard', 'gregorian']) and (year % 100 == 0) and (year % 400 != 0) and (year < 1583) @@ -264,9 +267,9 @@ def infer_freq(index): if isinstance(index, (DataArray, pd.Series)): dtype = np.asarray(index).dtype - if dtype == "datetime64[ns]": + if dtype == 'datetime64[ns]': index = pd.DatetimeIndex(index.values) - elif dtype == "timedelta64[ns]": + elif dtype == 'timedelta64[ns]': index = pd.TimedeltaIndex(index.values) else: index = xr.CFTimeIndex(index.values) @@ -285,18 +288,18 @@ def infer_freq(index): _ONE_HOUR = 60 * _ONE_MINUTE _ONE_DAY = 24 * _ONE_HOUR _MONTH_ABBREVIATIONS = { - 1: "JAN", - 2: "FEB", - 3: "MAR", - 4: "APR", - 5: "MAY", - 6: "JUN", - 7: "JUL", - 8: "AUG", - 9: "SEP", - 10: "OCT", - 11: "NOV", - 12: "DEC", + 1: 'JAN', + 2: 'FEB', + 3: 'MAR', + 4: 'APR', + 5: 'MAY', + 6: 'JUN', + 7: 'JUL', + 8: 'AUG', + 9: 'SEP', + 10: 'OCT', + 11: 'NOV', + 12: 'DEC', } @@ -306,7 +309,7 @@ def __init__(self, index): self.values = index.asi8 if len(index) < 3: - raise ValueError("Need at least 3 dates to infer frequency") + raise ValueError('Need at least 3 dates to infer frequency') self.is_monotonic = ( self.index.is_monotonic_decreasing or self.index.is_monotonic_increasing @@ -330,22 +333,22 @@ def get_freq(self): return None if _is_multiple(delta, _ONE_HOUR): - return _maybe_add_count("H", delta / _ONE_HOUR) + return _maybe_add_count('H', delta / _ONE_HOUR) elif _is_multiple(delta, _ONE_MINUTE): - return _maybe_add_count("T", delta / _ONE_MINUTE) + return _maybe_add_count('T', delta / _ONE_MINUTE) elif _is_multiple(delta, _ONE_SECOND): - return _maybe_add_count("S", delta / _ONE_SECOND) + return _maybe_add_count('S', delta / _ONE_SECOND) elif _is_multiple(delta, _ONE_MILLI): - return _maybe_add_count("L", delta / _ONE_MILLI) + return _maybe_add_count('L', delta / _ONE_MILLI) else: - return _maybe_add_count("U", delta / _ONE_MICRO) + return _maybe_add_count('U', delta / _ONE_MICRO) def _infer_daily_rule(self): annual_rule = self._get_annual_rule() if annual_rule: nyears = self.year_deltas[0] month = _MONTH_ABBREVIATIONS[self.index[0].month] - alias = f"{annual_rule}-{month}" + alias = f'{annual_rule}-{month}' return _maybe_add_count(alias, nyears) quartely_rule = self._get_quartely_rule() @@ -353,7 +356,7 @@ def _infer_daily_rule(self): nquarters = self.month_deltas[0] / 3 mod_dict = {0: 12, 2: 11, 1: 10} month = _MONTH_ABBREVIATIONS[mod_dict[self.index[0].month % 3]] - alias = f"{quartely_rule}-{month}" + alias = f'{quartely_rule}-{month}' return _maybe_add_count(alias, nquarters) monthly_rule = self._get_monthly_rule() @@ -363,7 +366,7 @@ def _infer_daily_rule(self): if len(self.deltas) == 1: # Daily as there is no "Weekly" offsets with CFTime days = self.deltas[0] / _ONE_DAY - return _maybe_add_count("D", days) + return _maybe_add_count('D', days) # CFTime has no business freq and no "week of month" (WOM) return None @@ -375,7 +378,7 @@ def _get_annual_rule(self): if len(np.unique(self.index.month)) > 1: return None - return {"cs": "AS", "ce": "A"}.get(month_anchor_check(self.index)) + return {'cs': 'AS', 'ce': 'A'}.get(month_anchor_check(self.index)) def _get_quartely_rule(self): if len(self.month_deltas) > 1: @@ -384,13 +387,13 @@ def _get_quartely_rule(self): if not self.month_deltas[0] % 3 == 0: return None - return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index)) + return {'cs': 'QS', 'ce': 'Q'}.get(month_anchor_check(self.index)) def _get_monthly_rule(self): if len(self.month_deltas) > 1: return None - return {"cs": "MS", "ce": "M"}.get(month_anchor_check(self.index)) + return {'cs': 'MS', 'ce': 'M'}.get(month_anchor_check(self.index)) @property def deltas(self): @@ -424,7 +427,7 @@ def _maybe_add_count(base: str, count: float): if count != 1: assert count == int(count) count = int(count) - return f"{count}{base}" + return f'{count}{base}' else: return base @@ -452,9 +455,9 @@ def month_anchor_check(dates): break if calendar_end: - return "ce" + return 'ce' elif calendar_start: - return "cs" + return 'cs' else: return None diff --git a/esmtools/utils.py b/esmtools/utils.py index 5d0cc8a..0db3b6a 100644 --- a/esmtools/utils.py +++ b/esmtools/utils.py @@ -3,15 +3,6 @@ from .checks import has_missing -def get_dims(da): - """ - Simple function to retrieve dimensions from a given dataset/datarray. - Currently returns as a list, but can add keyword to select tuple or - list if desired for any reason. - """ - return list(da.dims) - - def match_nans(x, y): """Performs pairwise matching of nans between ``x`` and ``y``. @@ -28,9 +19,9 @@ def match_nans(x, y): x, y = x.copy(), y.copy() idx = np.logical_or(np.isnan(x), np.isnan(y)) # NaNs cannot be added to `int` arrays. - if x.dtype == "int": - x = x.astype("float") - if y.dtype == "int": - y = y.astype("float") + if x.dtype == 'int': + x = x.astype('float') + if y.dtype == 'int': + y = y.astype('float') x[idx], y[idx] = np.nan, np.nan return x, y