diff --git a/esmtools/carbon.py b/esmtools/carbon.py index d1659bc..08fa27a 100644 --- a/esmtools/carbon.py +++ b/esmtools/carbon.py @@ -10,6 +10,30 @@ from .stats import linregress, nanmean, rm_poly +def calculate_compatible_emissions(global_co2_flux, co2atm_forcing): + """Calculate compatible emissions. + + Args: + global_co2_flux (xr.object): global co2_flux in PgC/yr. + co2atm_forcing (xr.object): prescribed atm. CO2 forcing in ppm. + + Returns: + xr.object: compatible emissions in PgC/yr. + + Reference: + * Jones, Chris, Eddy Robertson, Vivek Arora, Pierre Friedlingstein, Elena + Shevliakova, Laurent Bopp, Victor Brovkin, et al. “Twenty-First-Century + Compatible CO2 Emissions and Airborne Fraction Simulated by CMIP5 Earth + System Models under Four Representative Concentration Pathways.” + Journal of Climate 26, no. 13 (February 1, 2013): 4398–4413. + https://doi.org/10/f44bbn. + + """ + compatible_emissions = co2atm_forcing.diff('time') * 2.12 - global_co2_flux + compatible_emissions.name = 'compatible_emissions' + return compatible_emissions + + @is_xarray([0, 1]) def co2_sol(t, s): """Compute CO2 solubility per the equation used in CESM. @@ -57,93 +81,111 @@ def sol_calc(t, s): return ff -@is_xarray(0) -def schmidt(t): - """Computes the dimensionless Schmidt number. - - .. note:: - The polynomials used are for SST ranges between 0 and 30C and a salinity of 35. - - Args: - t (xarray object): SST (degC) - - Return: - Sc (xarray object): Schmidt number (dimensionless) - - References: - Sarmiento and Gruber (2006). Ocean Biogeochemical Dynamics. Table 3.3.1 +def get_iam_emissions(): + """Download IAM emissions from PIK website. - Examples: - >>> from esmtools.carbon import schmidt - >>> import numpy as np - >>> import xarray as xr - >>> t = xr.DataArray(np.random.randint(10, 25, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']) - >>> Sc = schmidt(t) + Returns: + iam_emissions (xr.object): emissions from the IAMs in PgC/yr. """ + ds = [] + member = ['rcp26', 'rcp45', 'rcp85'] + for r in member: + if r == 'rcp26': + r = 'rcp3pd' + r = r.upper() + link = f'http://www.pik-potsdam.de/~mmalte/rcps/data/{r}_EMISSIONS.xls' + e = pd.read_excel(link, sheet_name=f'{r}_EMISSIONS', skiprows=35, header=2) + e = e.set_index(e.columns[0]) + e.index.name = 'Year' + ds.append(e[['FossilCO2', 'OtherCO2']].to_xarray()) + ds = xr.concat(ds, 'member') + ds = ds.sel(Year=slice(1850, 2100)).rename({'Year': 'time'}) + ds['member'] = member + ds['IAM_emissions'] = ds['FossilCO2'] + ds['OtherCO2'] + return ds['IAM_emissions'] - def calc_schmidt(t): - c = [2073.1, 125.62, 3.6276, 0.043219] - Sc = c[0] - c[1] * t + c[2] * (t ** 2) - c[3] * (t ** 3) - return Sc - - Sc = xr.apply_ufunc( - calc_schmidt, t, input_core_dims=[[]], vectorize=True, dask='allowed' - ) - return Sc - - -@is_xarray(0) -def temp_decomp_takahashi(ds, time_dim='time', temperature='tos', pco2='spco2'): - """Decompose surface pCO2 into thermal and non-thermal components. - .. note:: - This expects cmorized variable names. You can pass keywords to change that - or rename your variables accordingly. +def plot_compatible_emissions( + compatible_emissions, global_co2_flux, iam_emissions=None, ax=None +): + """Plot combatible emissions. Args: - ds (xarray.Dataset): Contains two variables: - * `tos` (sea surface temperature in degC) - * `spco2` (surface pCO2 in uatm) + compatible_emissions (xr.object): compatible_emissions in PgC/yr from + `calculate_compatible_emissions`. + global_co2_flux (xr.object): Global CO2 flux in PgC/yr. + iam_emissions (xr.object): (optional) Emissions from the IAMs in PgC/yr. + Defaults to None. + ax (plt.ax): (optional) matplotlib axis to plot onto. Defaults to None. - Return: - decomp (xr.Dataset): Decomposed thermal and non-thermal components. + Returns: + ax (plt.ax): matplotlib axis. References: - Takahashi, Taro, Stewart C. Sutherland, Colm Sweeney, Alain Poisson, Nicolas - Metzl, Bronte Tilbrook, Nicolas Bates, et al. “Global Sea–Air CO2 Flux - Based on Climatological Surface Ocean PCO2, and Seasonal Biological and - Temperature Effects.” Deep Sea Research Part II: Topical Studies in - Oceanography, The Southern Ocean I: Climatic Changes in the Cycle of - Carbon in the Southern Ocean, 49, no. 9 (January 1,2002): 1601–22. - https://doi.org/10/dmk4f2. + * Jones, Chris, Eddy Robertson, Vivek Arora, Pierre Friedlingstein, Elena + Shevliakova, Laurent Bopp, Victor Brovkin, et al. “Twenty-First-Century + Compatible CO2 Emissions and Airborne Fraction Simulated by CMIP5 Earth + System Models under Four Representative Concentration Pathways.” + Journal of Climate 26, no. 13 (February 1, 2013): 4398–4413. + https://doi.org/10/f44bbn. Fig. 5a + * IPCC AR5 Fig. 6.25 - Examples: - >>> from esmtools.carbon import temp_decomp_takahashi - >>> import numpy as np - >>> import xarray as xr - >>> t = xr.DataArray(np.random.randint(10, 25, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('tos') - >>> pco2 = xr.DataArray(np.random.randint(350, 400, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('spco2') - >>> ds = xr.merge([t, pco2]) - >>> decomp = temp_decomp_takahashi(ds) """ - if temperature not in ds.data_vars: - raise ValueError(f'{temperature} is not a variable in your dataset.') - if pco2 not in ds.data_vars: - raise ValueError(f'{pco2} is not a variable in your dataset.') + if ax is None: + fig, ax = plt.subplots(figsize=(10, 6)) + # hist + alpha = 0.1 + c = 'gray' + compatible_emissions.isel(member=0).sel( + time=slice(None, 2005) + ).to_dataframe().unstack()['compatible_emissions'].plot( + ax=ax, legend=False, color=c, alpha=alpha + ) + compatible_emissions.isel(member=0).sel(time=slice(None, 2005)).mean( + 'initialization' + ).plot(ax=ax, color='w', lw=3) + compatible_emissions.isel(member=0).sel(time=slice(None, 2005)).mean( + 'initialization' + ).plot(ax=ax, color=c, lw=2) + # rcps + colors = ['royalblue', 'orange', 'red'][::-1] + for i, m in enumerate(global_co2_flux.member.values[::-1]): + c = colors[i] + compatible_emissions.sel(member=m).sel( + time=slice(2005, None) + ).to_dataframe().unstack()['compatible_emissions'].plot( + ax=ax, legend=False, color=c, alpha=alpha + ) + compatible_emissions.sel(member=m).sel(time=slice(2005, None)).mean( + 'initialization' + ).plot(ax=ax, color='w', lw=3) + compatible_emissions.sel(member=m).sel(time=slice(2005, None)).mean( + 'initialization' + ).plot(ax=ax, color=c, lw=2) - fac = 0.0432 - tos_mean = ds[temperature].mean(time_dim) - tos_diff = ds[temperature] - tos_mean - thermal = (ds[pco2].mean(time_dim) * (np.exp(tos_diff * fac))).rename('thermal') - non_thermal = (ds[pco2] * (np.exp(tos_diff * -fac))).rename('non_thermal') - decomp = xr.merge([thermal, non_thermal]) - decomp.attrs[ - 'description' - ] = 'Takahashi decomposition of pCO2 into thermal and non-thermal components.' - return decomp + if iam_emissions is not None: + ls = (0, (5, 5)) + iam_emissions.isel(member=0).sel(time=slice(None, 2005)).plot( + ax=ax, color='white', lw=3 + ) + iam_emissions.isel(member=0).sel(time=slice(None, 2005)).plot( + ax=ax, color='gray', lw=2, ls=ls + ) + for i, m in enumerate(global_co2_flux.member.values[::-1]): + c = colors[i] + iam_emissions.sel(member=m).sel(time=slice(2005, None)).plot( + ax=ax, color='white', lw=3 + ) + iam_emissions.sel(member=m).sel(time=slice(2005, None)).plot( + ax=ax, color=c, lw=2, ls=ls + ) + + # fig aestetics + ax.axhline(y=0, ls=':', c='gray') + ax.set_ylabel('Compatible emissions [PgC/yr]') + ax.set_xlabel('Time [year]') + ax.set_title('Compatible emissions') + return ax @is_xarray([0, 1]) @@ -161,8 +203,8 @@ def potential_pco2(t_insitu, pco2_insitu): pco2_potential (xarray object): potential pCO2 with depth Reference: - Sarmiento, Jorge Louis, and Nicolas Gruber. Ocean Biogeochemical Dynamics. - Princeton, NJ: Princeton Univ. Press, 2006., p.421, eq. (10:3:1) + * Sarmiento, Jorge Louis, and Nicolas Gruber. Ocean Biogeochemical Dynamics. + Princeton, NJ: Princeton Univ. Press, 2006., p.421, eq. (10:3:1) Examples: >>> from esmtools.carbon import potential_pco2 @@ -180,79 +222,89 @@ def potential_pco2(t_insitu, pco2_insitu): @is_xarray(0) -def spco2_sensitivity(ds): - """Compute sensitivity of surface pCO2 to changes in driver variables. +def schmidt(t): + """Computes the dimensionless Schmidt number. + + .. note:: + The polynomials used are for SST ranges between 0 and 30C and a salinity of 35. Args: - ds (xr.Dataset): containing cmorized variables: - * spco2 [uatm]: ocean pCO2 at surface - * talkos[mmol m-3]: Alkalinity at ocean surface - * dissicos[mmol m-3]: DIC at ocean surface - * tos [C] : temperature at ocean surface - * sos [psu] : salinity at ocean surface + t (xarray object): SST (degC) - Returns: - sensitivity (xr.Dataset): + Return: + Sc (xarray object): Schmidt number (dimensionless) References: - * Lovenduski, Nicole S., Nicolas Gruber, Scott C. Doney, and Ivan D. Lima. - “Enhanced CO2 Outgassing in the Southern Ocean from a Positive Phase of - the Southern Annular Mode.” Global Biogeochemical Cycles 21, no. 2 - (2007). https://doi.org/10/fpv2wt. - * Sarmiento, Jorge Louis, and Nicolas Gruber. Ocean Biogeochemical Dynamics. - Princeton, NJ: Princeton Univ. Press, 2006., p.421, eq. (10:3:1) + Sarmiento and Gruber (2006). Ocean Biogeochemical Dynamics. Table 3.3.1 Examples: - >>> from esmtools.carbon import spco2_sensitivity + >>> from esmtools.carbon import schmidt >>> import numpy as np >>> import xarray as xr - >>> tos = xr.DataArray(np.random.randint(15, 30, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('tos') - >>> sos = xr.DataArray(np.random.randint(30, 35, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('sos') - >>> spco2 = xr.DataArray(np.random.randint(350, 400, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('spco2') - >>> dissicos = xr.DataArray(np.random.randint(1900, 2100, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('dissicos') - >>> talkos = xr.DataArray(np.random.randint(2100, 2300, size=(100, 10, 10)), - dims=['time', 'lat', 'lon']).rename('talkos') - >>> ds = xr.merge([tos, sos, spco2, dissicos, talkos]) - >>> sensitivity = spco2_sensitivity(ds) + >>> t = xr.DataArray(np.random.randint(10, 25, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']) + >>> Sc = schmidt(t) """ - def _check_variables(ds): - requiredVars = ['spco2', 'tos', 'sos', 'talkos', 'dissicos'] - if not all(i in ds.data_vars for i in requiredVars): - missingVars = [i for i in requiredVars if i not in ds.data_vars] - raise ValueError( - f"""Missing variables needed for calculation: - {missingVars}""" - ) - - _check_variables(ds) - # Sensitivities are based on the time-mean for each field. This computes - # sensitivities at each grid cell. - # TODO: Add keyword for sliding mean, as in N year chunks of time to - # account for trends. - DIC = ds['dissicos'] - ALK = ds['talkos'] - SALT = ds['sos'] - pCO2 = ds['spco2'] + def calc_schmidt(t): + c = [2073.1, 125.62, 3.6276, 0.043219] + Sc = c[0] - c[1] * t + c[2] * (t ** 2) - c[3] * (t ** 3) + return Sc - buffer_factor = dict() - buffer_factor['ALK'] = -(ALK ** 2) / ((2 * DIC - ALK) * (ALK - DIC)) - buffer_factor['DIC'] = (3 * ALK * DIC - 2 * DIC ** 2) / ( - (2 * DIC - ALK) * (ALK - DIC) + Sc = xr.apply_ufunc( + calc_schmidt, t, input_core_dims=[[]], vectorize=True, dask='allowed' ) + return Sc + + +@is_xarray(0) +def spco2_decomposition(ds_terms, detrend=True, order=1, deseasonalize=False): + """Decompose oceanic surface pco2 in a first order Taylor-expansion. + + Args: + ds_terms (xr.Dataset): containing cmorized variables: + spco2 [ppm]: ocean pCO2 at surface + talkos[mmol m-3]: Alkalinity at ocean surface + dissicos[mmol m-3]: DIC at ocean surface + tos [C] : temperature at ocean surface + sos [psu] : salinity at ocean surface + detrend (bool): If True, detrend when generating anomalies. Default to + a linear (order 1) regression. + order (int): If detrend is true, the order polynomial to remove from + your time series. + deseasonalize (bool): If True, deseasonalize when generating anomalies. + + Return: + terms_in_pCO2_units (xr.Dataset): terms of spco2 decomposition + + References: + * Lovenduski, Nicole S., Nicolas Gruber, Scott C. Doney, and Ivan D. Lima. + “Enhanced CO2 Outgassing in the Southern Ocean from a Positive Phase of + the Southern Annular Mode.” Global Biogeochemical Cycles 21, no. 2 + (2007). https://doi.org/10/fpv2wt. + + """ + pco2_sensitivity = spco2_sensitivity(ds_terms) + + if detrend and not order: + raise KeyError( + """Please provide the order of polynomial you would like + to remove from your time series.""" + ) + elif detrend: + ds_terms_anomaly = rm_poly(ds_terms, order=order, dim='time') + else: + warnings.warn('Your data are not being detrended.') + ds_terms_anomaly = ds_terms - ds_terms.mean('time') + + if deseasonalize: + clim = ds_terms_anomaly.groupby('time.month').mean('time') + ds_terms_anomaly = ds_terms_anomaly.groupby('time.month') - clim + else: + warnings.warn('Your data are not being deseasonalized.') - # Compute sensitivities - sensitivity = dict() - sensitivity['tos'] = 0.0423 - sensitivity['sos'] = 1 / SALT - sensitivity['talkos'] = (1 / ALK) * buffer_factor['ALK'] - sensitivity['dissicos'] = (1 / DIC) * buffer_factor['DIC'] - sensitivity = xr.Dataset(sensitivity) * pCO2 - return sensitivity + terms_in_pCO2_units = pco2_sensitivity.mean('time') * ds_terms_anomaly + return terms_in_pCO2_units # TODO: adapt for CESM and MPI output. @@ -299,6 +351,7 @@ def spco2_decomposition_index( * Brady, Riley X., et al. "On the role of climate modes in modulating the air–sea CO 2 fluxes in eastern boundary upwelling systems." Biogeosciences 16.2 (2019): 329-346. + """ def regression_against_index(ds, index, psig=None): @@ -356,184 +409,129 @@ def regression_against_index(ds, index, psig=None): @is_xarray(0) -def spco2_decomposition(ds_terms, detrend=True, order=1, deseasonalize=False): - """Decompose oceanic surface pco2 in a first order Taylor-expansion. +def spco2_sensitivity(ds): + """Compute sensitivity of surface pCO2 to changes in driver variables. Args: - ds_terms (xr.Dataset): containing cmorized variables: - spco2 [ppm]: ocean pCO2 at surface - talkos[mmol m-3]: Alkalinity at ocean surface - dissicos[mmol m-3]: DIC at ocean surface - tos [C] : temperature at ocean surface - sos [psu] : salinity at ocean surface - detrend (bool): If True, detrend when generating anomalies. Default to - a linear (order 1) regression. - order (int): If detrend is true, the order polynomial to remove from - your time series. - deseasonalize (bool): If True, deseasonalize when generating anomalies. + ds (xr.Dataset): containing cmorized variables: + * spco2 [uatm]: ocean pCO2 at surface + * talkos[mmol m-3]: Alkalinity at ocean surface + * dissicos[mmol m-3]: DIC at ocean surface + * tos [C] : temperature at ocean surface + * sos [psu] : salinity at ocean surface - Return: - terms_in_pCO2_units (xr.Dataset): terms of spco2 decomposition + Returns: + sensitivity (xr.Dataset): References: * Lovenduski, Nicole S., Nicolas Gruber, Scott C. Doney, and Ivan D. Lima. “Enhanced CO2 Outgassing in the Southern Ocean from a Positive Phase of the Southern Annular Mode.” Global Biogeochemical Cycles 21, no. 2 (2007). https://doi.org/10/fpv2wt. + * Sarmiento, Jorge Louis, and Nicolas Gruber. Ocean Biogeochemical Dynamics. + Princeton, NJ: Princeton Univ. Press, 2006., p.421, eq. (10:3:1) + Examples: + >>> from esmtools.carbon import spco2_sensitivity + >>> import numpy as np + >>> import xarray as xr + >>> tos = xr.DataArray(np.random.randint(15, 30, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('tos') + >>> sos = xr.DataArray(np.random.randint(30, 35, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('sos') + >>> spco2 = xr.DataArray(np.random.randint(350, 400, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('spco2') + >>> dissicos = xr.DataArray(np.random.randint(1900, 2100, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('dissicos') + >>> talkos = xr.DataArray(np.random.randint(2100, 2300, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('talkos') + >>> ds = xr.merge([tos, sos, spco2, dissicos, talkos]) + >>> sensitivity = spco2_sensitivity(ds) """ - pco2_sensitivity = spco2_sensitivity(ds_terms) - - if detrend and not order: - raise KeyError( - """Please provide the order of polynomial you would like - to remove from your time series.""" - ) - elif detrend: - ds_terms_anomaly = rm_poly(ds_terms, order=order, dim='time') - else: - warnings.warn('Your data are not being detrended.') - ds_terms_anomaly = ds_terms - ds_terms.mean('time') - - if deseasonalize: - clim = ds_terms_anomaly.groupby('time.month').mean('time') - ds_terms_anomaly = ds_terms_anomaly.groupby('time.month') - clim - else: - warnings.warn('Your data are not being deseasonalized.') - - terms_in_pCO2_units = pco2_sensitivity.mean('time') * ds_terms_anomaly - return terms_in_pCO2_units - - -def calculate_compatible_emissions(global_co2_flux, co2atm_forcing): - """Calculate compatible emissions. - - Args: - global_co2_flux (xr.object): global co2_flux in PgC/yr. - co2atm_forcing (xr.object): prescribed atm. CO2 forcing in ppm. - - Returns: - xr.object: compatible emissions in PgC/yr. - - Reference: - * Jones, Chris, Eddy Robertson, Vivek Arora, Pierre Friedlingstein, Elena - Shevliakova, Laurent Bopp, Victor Brovkin, et al. “Twenty-First-Century - Compatible CO2 Emissions and Airborne Fraction Simulated by CMIP5 Earth - System Models under Four Representative Concentration Pathways.” - Journal of Climate 26, no. 13 (February 1, 2013): 4398–4413. - https://doi.org/10/f44bbn. - """ - compatible_emissions = co2atm_forcing.diff('time') * 2.12 - global_co2_flux - compatible_emissions.name = 'compatible_emissions' - return compatible_emissions + def _check_variables(ds): + requiredVars = ['spco2', 'tos', 'sos', 'talkos', 'dissicos'] + if not all(i in ds.data_vars for i in requiredVars): + missingVars = [i for i in requiredVars if i not in ds.data_vars] + raise ValueError( + f"""Missing variables needed for calculation: + {missingVars}""" + ) + _check_variables(ds) + # Sensitivities are based on the time-mean for each field. This computes + # sensitivities at each grid cell. + # TODO: Add keyword for sliding mean, as in N year chunks of time to + # account for trends. + DIC = ds['dissicos'] + ALK = ds['talkos'] + SALT = ds['sos'] + pCO2 = ds['spco2'] -def get_iam_emissions(): - """Download IAM emissions from PIK website. + buffer_factor = dict() + buffer_factor['ALK'] = -(ALK ** 2) / ((2 * DIC - ALK) * (ALK - DIC)) + buffer_factor['DIC'] = (3 * ALK * DIC - 2 * DIC ** 2) / ( + (2 * DIC - ALK) * (ALK - DIC) + ) - Returns: - iam_emissions (xr.object): emissions from the IAMs in PgC/yr. + # Compute sensitivities + sensitivity = dict() + sensitivity['tos'] = 0.0423 + sensitivity['sos'] = 1 / SALT + sensitivity['talkos'] = (1 / ALK) * buffer_factor['ALK'] + sensitivity['dissicos'] = (1 / DIC) * buffer_factor['DIC'] + sensitivity = xr.Dataset(sensitivity) * pCO2 + return sensitivity - """ - ds = [] - member = ['rcp26', 'rcp45', 'rcp85'] - for r in member: - if r == 'rcp26': - r = 'rcp3pd' - r = r.upper() - link = f'http://www.pik-potsdam.de/~mmalte/rcps/data/{r}_EMISSIONS.xls' - e = pd.read_excel(link, sheet_name=f'{r}_EMISSIONS', skiprows=35, header=2) - e = e.set_index(e.columns[0]) - e.index.name = 'Year' - ds.append(e[['FossilCO2', 'OtherCO2']].to_xarray()) - ds = xr.concat(ds, 'member') - ds = ds.sel(Year=slice(1850, 2100)).rename({'Year': 'time'}) - ds['member'] = member - ds['IAM_emissions'] = ds['FossilCO2'] + ds['OtherCO2'] - return ds['IAM_emissions'] +@is_xarray(0) +def temp_decomp_takahashi(ds, time_dim='time', temperature='tos', pco2='spco2'): + """Decompose surface pCO2 into thermal and non-thermal components. -def plot_compatible_emissions( - compatible_emissions, global_co2_flux, iam_emissions=None, ax=None -): - """Plot combatible emissions. + .. note:: + This expects cmorized variable names. You can pass keywords to change that + or rename your variables accordingly. Args: - compatible_emissions (xr.object): compatible_emissions in PgC/yr from - `calculate_compatible_emissions`. - global_co2_flux (xr.object): Global CO2 flux in PgC/yr. - iam_emissions (xr.object): (optional) Emissions from the IAMs in PgC/yr. - Defaults to None. - ax (plt.ax): (optional) matplotlib axis to plot onto. Defaults to None. - - Returns: - ax (plt.ax): matplotlib axis. + ds (xarray.Dataset): Contains two variables: + * `tos` (sea surface temperature in degC) + * `spco2` (surface pCO2 in uatm) - """ - """Plot combatible emissions plot. + Return: + decomp (xr.Dataset): Decomposed thermal and non-thermal components. References: - * Jones, Chris, Eddy Robertson, Vivek Arora, Pierre Friedlingstein, Elena - Shevliakova, Laurent Bopp, Victor Brovkin, et al. “Twenty-First-Century - Compatible CO2 Emissions and Airborne Fraction Simulated by CMIP5 Earth - System Models under Four Representative Concentration Pathways.” - Journal of Climate 26, no. 13 (February 1, 2013): 4398–4413. - https://doi.org/10/f44bbn. Fig. 5a - * IPCC AR5 Fig. 6.25 - """ - if ax is None: - fig, ax = plt.subplots(figsize=(10, 6)) - # hist - alpha = 0.1 - c = 'gray' - compatible_emissions.isel(member=0).sel( - time=slice(None, 2005) - ).to_dataframe().unstack()['compatible_emissions'].plot( - ax=ax, legend=False, color=c, alpha=alpha - ) - compatible_emissions.isel(member=0).sel(time=slice(None, 2005)).mean( - 'initialization' - ).plot(ax=ax, color='w', lw=3) - compatible_emissions.isel(member=0).sel(time=slice(None, 2005)).mean( - 'initialization' - ).plot(ax=ax, color=c, lw=2) - # rcps - colors = ['royalblue', 'orange', 'red'][::-1] - for i, m in enumerate(global_co2_flux.member.values[::-1]): - c = colors[i] - compatible_emissions.sel(member=m).sel( - time=slice(2005, None) - ).to_dataframe().unstack()['compatible_emissions'].plot( - ax=ax, legend=False, color=c, alpha=alpha - ) - compatible_emissions.sel(member=m).sel(time=slice(2005, None)).mean( - 'initialization' - ).plot(ax=ax, color='w', lw=3) - compatible_emissions.sel(member=m).sel(time=slice(2005, None)).mean( - 'initialization' - ).plot(ax=ax, color=c, lw=2) + * Takahashi, Taro, Stewart C. Sutherland, Colm Sweeney, Alain Poisson, Nicolas + Metzl, Bronte Tilbrook, Nicolas Bates, et al. “Global Sea–Air CO2 Flux + Based on Climatological Surface Ocean PCO2, and Seasonal Biological and + Temperature Effects.” Deep Sea Research Part II: Topical Studies in + Oceanography, The Southern Ocean I: Climatic Changes in the Cycle of + Carbon in the Southern Ocean, 49, no. 9 (January 1,2002): 1601–22. + https://doi.org/10/dmk4f2. - if iam_emissions is not None: - ls = (0, (5, 5)) - iam_emissions.isel(member=0).sel(time=slice(None, 2005)).plot( - ax=ax, color='white', lw=3 - ) - iam_emissions.isel(member=0).sel(time=slice(None, 2005)).plot( - ax=ax, color='gray', lw=2, ls=ls - ) - for i, m in enumerate(global_co2_flux.member.values[::-1]): - c = colors[i] - iam_emissions.sel(member=m).sel(time=slice(2005, None)).plot( - ax=ax, color='white', lw=3 - ) - iam_emissions.sel(member=m).sel(time=slice(2005, None)).plot( - ax=ax, color=c, lw=2, ls=ls - ) + Examples: + >>> from esmtools.carbon import temp_decomp_takahashi + >>> import numpy as np + >>> import xarray as xr + >>> t = xr.DataArray(np.random.randint(10, 25, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('tos') + >>> pco2 = xr.DataArray(np.random.randint(350, 400, size=(100, 10, 10)), + dims=['time', 'lat', 'lon']).rename('spco2') + >>> ds = xr.merge([t, pco2]) + >>> decomp = temp_decomp_takahashi(ds) + """ + if temperature not in ds.data_vars: + raise ValueError(f'{temperature} is not a variable in your dataset.') + if pco2 not in ds.data_vars: + raise ValueError(f'{pco2} is not a variable in your dataset.') - # fig aestetics - ax.axhline(y=0, ls=':', c='gray') - ax.set_ylabel('Compatible emissions [PgC/yr]') - ax.set_xlabel('Time [year]') - ax.set_title('Compatible emissions') - return ax + fac = 0.0432 + tos_mean = ds[temperature].mean(time_dim) + tos_diff = ds[temperature] - tos_mean + thermal = (ds[pco2].mean(time_dim) * (np.exp(tos_diff * fac))).rename('thermal') + non_thermal = (ds[pco2] * (np.exp(tos_diff * -fac))).rename('non_thermal') + decomp = xr.merge([thermal, non_thermal]) + decomp.attrs[ + 'description' + ] = 'Takahashi decomposition of pCO2 into thermal and non-thermal components.' + return decomp diff --git a/esmtools/physics.py b/esmtools/physics.py index 9d58a93..94aa2c8 100644 --- a/esmtools/physics.py +++ b/esmtools/physics.py @@ -1,35 +1,29 @@ -""" -Objects dealing with physical conversions. - -Winds and Wind Stress ---------------------- -`stress_to_speed` : Convert ocean wind stress to U10 wind speed on the native ocean grid -""" import numpy as np import xarray as xr def stress_to_speed(x, y): - """This converts ocean wind stress to wind speed at 10m over the ocean so - that one can use the native ocean grid rather than trying to interpolate between - ocean and atmosphere grids. + """Convert ocean wind stress to wind speed at 10 m over the ocean. - This is based on the conversion used in Lovenduski et al. (2007), which is related - to the CESM coupler conversrion: - http://www.cesm.ucar.edu/models/ccsm3.0/cpl6/users_guide/node20.html + This expects that tau is in dyn/cm2. - tau/rho = 0.0027U + 0.000142U2 + 0.0000764U3 + .. math:: + \\tau = 0.0027 * U + 0.000142 * U2 + 0.0000764 * U3 - Input - ----- - x : DataArray of taux or taux**2 - y : DataArray of tauy or tauy**2 + .. note:: + This is useful for looking at wind speed on the native ocean grid, rather than + trying to interpolate between atmospheric and oceanic grids. + + This is based on the conversion used in Lovenduski et al. (2007), which is related + to the CESM coupler conversion: + http://www.cesm.ucar.edu/models/ccsm3.0/cpl6/users_guide/node20.html - This script expects that tau is in dyn/cm2. + Args: + x (xr.DataArray): ``TAUX`` or ``TAUX2``. + y (xr.DataArray): ``TAUY`` or ``TAUY2``. - Return - ------ - U10 : DataArray of the approximated wind speed. + Returns: + U10 (xr.DataArray): Approximated U10 wind speed. """ tau = ( (np.sqrt(x ** 2 + y ** 2)) / 1.2 * 100 ** 2 / 1e5 @@ -42,5 +36,5 @@ def stress_to_speed(x, y): i = np.imag(r) good = np.where(i == 0) U10[t] = np.real(r[good]) - U10 = xr.DataArray(U10, dims=["time"], coords=[tau["time"]]) + U10 = xr.DataArray(U10, dims=['time'], coords=[tau['time']]) return U10 diff --git a/esmtools/spatial.py b/esmtools/spatial.py index 9965524..faf596d 100644 --- a/esmtools/spatial.py +++ b/esmtools/spatial.py @@ -3,6 +3,45 @@ from .checks import is_xarray +@is_xarray(0) +def extract_region(ds, xgrid, ygrid, coords, lat_dim='lat', lon_dim='lon'): + """Extract a subset of some larger spatial data. + + Args: + ds (xarray object): Data to be subset. + xgrid (array_like): Meshgrid of longitudes. + ygrid (array_like): Meshgrid of latitudes. + coords (1-D array or list): [x0, x1, y0, y1] pertaining to the corners of the + box to extract. + lat_dim (optional str): Latitude dimension name (default 'lat'). + lon_dim (optional str): Longitude dimension name (default 'lon') + + Returns: + subset_data (xarray object): Data subset to domain of interest. + + Examples: + >>> import esmtools as et + >>> import numpy as np + >>> import xarray as xr + >>> x = np.linspace(0, 360, 37) + >>> y = np.linspace(-90, 90, 19) + >>> xx, yy = np.meshgrid(x, y) + >>> ds = xr.DataArray(np.random.rand(19, 37), dims=['lat', 'lon']) + >>> ds['latitude'] = (('lat', 'lon'), yy) + >>> ds['longitude'] = (('lat', 'lon'), xx) + >>> coords = [0, 30, -20, 20] + >>> subset = et.spatial.extract_region(ds, xx, yy, coords) + """ + # Extract the corners of the box. + x0, x1, y0, y1 = coords + # Find indices on meshgrid for the box corners. + a, c = find_indices(xgrid, ygrid, x0, y0) + b, d = find_indices(xgrid, ygrid, x1, y1) + # Slice is not inclusive, so need to add one to end. + subset_data = ds.isel({lat_dim: slice(a, b + 1), lon_dim: slice(c, d + 1)}) + return subset_data + + def find_indices(xgrid, ygrid, xpoint, ypoint): """Returns the i, j index for a latitude/longitude point on a grid. @@ -42,42 +81,3 @@ def find_indices(xgrid, ygrid, xpoint, ypoint): min_ix = np.nanargmin(reduced_grid) i, j = np.unravel_index(min_ix, reduced_grid.shape) return i, j - - -@is_xarray(0) -def extract_region(ds, xgrid, ygrid, coords, lat_dim="lat", lon_dim="lon"): - """Extract a subset of some larger spatial data. - - Args: - ds (xarray object): Data to be subset. - xgrid (array_like): Meshgrid of longitudes. - ygrid (array_like): Meshgrid of latitudes. - coords (1-D array or list): [x0, x1, y0, y1] pertaining to the corners of the - box to extract. - lat_dim (optional str): Latitude dimension name (default 'lat'). - lon_dim (optional str): Longitude dimension name (default 'lon') - - Returns: - subset_data (xarray object): Data subset to domain of interest. - - Examples: - >>> import esmtools as et - >>> import numpy as np - >>> import xarray as xr - >>> x = np.linspace(0, 360, 37) - >>> y = np.linspace(-90, 90, 19) - >>> xx, yy = np.meshgrid(x, y) - >>> ds = xr.DataArray(np.random.rand(19, 37), dims=['lat', 'lon']) - >>> ds['latitude'] = (('lat', 'lon'), yy) - >>> ds['longitude'] = (('lat', 'lon'), xx) - >>> coords = [0, 30, -20, 20] - >>> subset = et.spatial.extract_region(ds, xx, yy, coords) - """ - # Extract the corners of the box. - x0, x1, y0, y1 = coords - # Find indices on meshgrid for the box corners. - a, c = find_indices(xgrid, ygrid, x0, y0) - b, d = find_indices(xgrid, ygrid, x1, y1) - # Slice is not inclusive, so need to add one to end. - subset_data = ds.isel({lat_dim: slice(a, b + 1), lon_dim: slice(c, d + 1)}) - return subset_data diff --git a/esmtools/stats.py b/esmtools/stats.py index a40bff8..aa10a74 100644 --- a/esmtools/stats.py +++ b/esmtools/stats.py @@ -8,128 +8,7 @@ from .checks import has_dims, has_missing, is_xarray from .timeutils import TimeUtilAccessor -from .utils import get_dims, match_nans - - -@is_xarray(0) -def standardize(ds, dim="time"): - """Standardize Dataset/DataArray - - .. math:: - \\frac{x - \\mu_{x}}{\\sigma_{x}} - - Args: - ds (xarray object): Dataset or DataArray with variable(s) to standardize. - dim (optional str): Which dimension to standardize over (default 'time'). - - Returns: - stdized (xarray object): Standardized variable(s). - """ - stdized = (ds - ds.mean(dim)) / ds.std(dim) - return stdized - - -@is_xarray(0) -def nanmean(ds, dim="time"): - """Compute mean NaNs and suppress warning from numpy""" - if "time" in ds.dims: - mask = ds.isnull().isel(time=0) - else: - mask = ds.isnull() - ds = ds.fillna(0).mean(dim) - ds = ds.where(~mask) - return ds - - -@is_xarray(0) -def cos_weight(da, lat_coord="lat", lon_coord="lon", one_dimensional=True): - """ - Area-weights data on a regular (e.g. 360x180) grid that does not come with - cell areas. Uses cosine-weighting. - - Parameters - ---------- - da : DataArray with longitude and latitude - lat_coord : str (optional) - Name of latitude coordinate - lon_coord : str (optional) - Name of longitude coordinate - one_dimensional : bool (optional) - If true, assumes that lat and lon are 1D (i.e. not a meshgrid) - - Returns - ------- - aw_da : Area-weighted DataArray - - Examples - -------- - import esmtools as et - da_aw = et.stats.reg_aw(SST) - """ - non_spatial = [i for i in get_dims(da) if i not in [lat_coord, lon_coord]] - filter_dict = {} - while len(non_spatial) > 0: - filter_dict.update({non_spatial[0]: 0}) - non_spatial.pop(0) - if one_dimensional: - lon, lat = np.meshgrid(da[lon_coord], da[lat_coord]) - else: - lat = da[lat_coord] - # NaN out land to not go into area-weighting - lat = lat.astype("float") - nan_mask = np.asarray(da.isel(filter_dict).isnull()) - lat[nan_mask] = np.nan - cos_lat = np.cos(np.deg2rad(lat)) - aw_da = (da * cos_lat).sum(lat_coord).sum(lon_coord) / np.nansum(cos_lat) - return aw_da - - -@is_xarray(0) -def area_weight(da, area_coord="area"): - """ - Returns an area-weighted time series from the input xarray dataarray. This - automatically figures out spatial dimensions vs. other dimensions. I.e., - this function works for just a single realization or for many realizations. - See `reg_aw` if you have a regular (e.g. 360x180) grid that does not - contain cell areas. - - It also looks like xarray is implementing a feature like this. - - NOTE: This currently does not support datasets (of multiple variables) - The user can alleviate this by using the .apply() function. - - Parameters - ---------- - da : DataArray - area_coord : str (defaults to 'area') - Name of area coordinate if different from 'area' - - Returns - ------- - aw_da : Area-weighted DataArray - """ - area = da[area_coord] - # Mask the area coordinate in case you've got a bunch of NaNs, e.g. a mask - # or land. - dimlist = get_dims(da) - # Pull out coordinates that aren't spatial. Time, ensemble members, etc. - non_spatial = [i for i in dimlist if i not in get_dims(area)] - filter_dict = {} - while len(non_spatial) > 0: - filter_dict.update({non_spatial[0]: 0}) - non_spatial.pop(0) - masked_area = area.where(da.isel(filter_dict).notnull()) - # Compute area-weighting. - dimlist = get_dims(masked_area) - aw_da = da * masked_area - # Sum over arbitrary number of dimensions. - while len(dimlist) > 0: - print(f"Summing over {dimlist[0]}") - aw_da = aw_da.sum(dimlist[0]) - dimlist.pop(0) - # Finish area-weighting by dividing by sum of area coordinate. - aw_da = aw_da / masked_area.sum() - return aw_da +from .utils import match_nans def _check_y_not_independent_variable(y, dim): @@ -146,8 +25,8 @@ def _check_y_not_independent_variable(y, dim): """ if isinstance(y, xr.DataArray) and (y.name == dim): raise ValueError( - f"Dependent variable y should not be the same as the dim {dim} being " - "applied over. Change your y variable to x." + f'Dependent variable y should not be the same as the dim {dim} being ' + 'applied over. Change your y variable to x.' ) @@ -203,12 +82,12 @@ def _handle_nans(x, y, nan_policy): # Only support 1D, since we are doing `~np.isnan()` indexing for 'omit'/'drop'. if (x.ndim > 1) or (y.ndim > 1): raise ValueError( - f"x and y must be 1-dimensional. Got {x.ndim} for x and {y.ndim} for y." + f'x and y must be 1-dimensional. Got {x.ndim} for x and {y.ndim} for y.' ) - if nan_policy in ["none", "propagate"]: + if nan_policy in ['none', 'propagate']: return x, y - elif nan_policy == "raise": + elif nan_policy == 'raise': if has_missing(x) or has_missing(y): raise ValueError( "Input data contains NaNs. Consider changing `nan_policy` to 'none' " @@ -216,7 +95,7 @@ def _handle_nans(x, y, nan_policy): ) else: return x, y - elif nan_policy in ["omit", "drop"]: + elif nan_policy in ['omit', 'drop']: if has_missing(x) or has_missing(y): x_mod, y_mod = match_nans(x, y) # The above function pairwise-matches nans. Now we remove them so that we @@ -256,11 +135,11 @@ def _polyfit(x, y, order, nan_policy): """ x_mod, y_mod = _handle_nans(x, y, nan_policy) # This catches cases where a given grid cell is full of nans, like in land masking. - if (nan_policy in ["omit", "drop"]) and (x_mod.size == 0): + if (nan_policy in ['omit', 'drop']) and (x_mod.size == 0): return np.full(len(x), np.nan) # This catches cases where there is missing values in the independent axis, which # breaks polyfit. - elif (nan_policy in ["none", "propagate"]) and (has_missing(x_mod)): + elif (nan_policy in ['none', 'propagate']) and (has_missing(x_mod)): return np.full(len(x), np.nan) else: # fit to data without nans, return applied to original independent axis. @@ -279,14 +158,115 @@ def _warn_if_not_converted_to_original_time_units(x): if x.timeutils.is_temporal: if x.timeutils.freq is None: warnings.warn( - "Datetime frequency not detected. Slope and std. errors will be " - "in original units per day (e.g., degC per day). Multiply by " - "e.g., 365.25 to convert to original units per year." + 'Datetime frequency not detected. Slope and std. errors will be ' + 'in original units per day (e.g., degC per day). Multiply by ' + 'e.g., 365.25 to convert to original units per year.' ) +@is_xarray(0) +def ACF(ds, dim='time', nlags=None): + """Compute the ACF of a time series to a specific lag. + + Args: + ds (xarray object): dataset/dataarray containing the time series. + dim (str): dimension to apply ACF over. + nlags (optional int): number of lags to compute ACF over. If None, + compute for length of `dim` on `ds`. + + Returns: + Dataset or DataArray with ACF results. + + Notes: + This is preferred over ACF functions from MATLAB/scipy, since it doesn't + use FFT methods. + """ + # Drop variables that don't have requested dimension, so this can be + # applied over the full dataset. + if isinstance(ds, xr.Dataset): + dropVars = [i for i in ds if dim not in ds[i].dims] + ds = ds.drop(dropVars) + + # Loop through every step in `dim` + if nlags is None: + nlags = ds[dim].size + + acf = [] + # The 2 factor accounts for fact that time series reduces in size for + # each lag. + for i in range(nlags - 2): + res = autocorr(ds, lag=i, dim=dim) + acf.append(res) + acf = xr.concat(acf, dim=dim) + return acf + + +@is_xarray(0) +def autocorr(ds, lag=1, dim='time', return_p=False): + """Calculated lagged correlation of a xr.Dataset. + + Args: + ds (xarray object): Dataset to compute autocorrelation with. + lag (int, optional): Lag to compute autocorrelation at. Defaults to 1. + dim (str, optional): Dimension to compute autocorrelation over. + Default to 'time'. + return_p (bool, optional): If True, return just the correlation coefficient. + If False, return both the correlation coefficient and p value. + + Returns: + r (xarray object): Pearson correlation coefficient. + p (xarray object): P value, if ``return_p`` is True. + """ + return st.autocorr(ds, lag=lag, dim=dim, return_p=return_p) + + +@is_xarray(0) +def corr(x, y, dim='time', lead=0, return_p=False): + """Computes the Pearson product-moment coefficient of linear correlation. + + This version calculates the effective degrees of freedom, accounting + for autocorrelation within each time series that could fluff the + significance of the correlation. + + Args: + x, y (xarray.DataArray): Time series being correlated. + dim (str, optional): Dimension to calculate correlation over. Defaults to + 'time'. + lead (int, optional): If lead > 0, ``x`` leads ``y`` by that many time steps. + If lead < 0, ``x`` lags ``y`` by that many time steps. Defaults to 0. + return_p (bool, optional). If True, return both ``r`` and ``p``. Otherwise, + just return ``r``. Defaults to False. + + Returns: + r (xarray object): Pearson correlation coefficient. + p (xarray object): P value, if ``return_p`` is True. + + References: + * Wilks, Daniel S. Statistical methods in the atmospheric sciences. + Vol. 100. Academic press, 2011. + * Lovenduski, Nicole S., and Nicolas Gruber. "Impact of the Southern + Annular Mode on Southern Ocean circulation and biology." Geophysical + Research Letters 32.11 (2005). + * Brady, R. X., Lovenduski, N. S., Alexander, M. A., Jacox, M., and + Gruber, N.: On the role of climate modes in modulating the air-sea CO2 + fluxes in Eastern Boundary Upwelling Systems, Biogeosciences Discuss., + https://doi.org/10.5194/bg-2018-415, 2019. + + """ + # Broadcasts a time series to the same coordinates/size as the grid. If they + # are both grids, this function does nothing and isn't expensive. + x, y = xr.broadcast(x, y) + + # Negative lead should have y lead x. + if lead < 0: + lead = np.abs(lead) + return st.corr(y, x, dim=dim, lag=lead, return_p=return_p) + else: + return st.corr(x, y, dim=dim, lag=lead, return_p=return_p) + + @is_xarray([0, 1]) -def linear_slope(x, y, dim="time", nan_policy="none"): +def linear_slope(x, y, dim='time', nan_policy='none'): """Returns the linear slope with y regressed onto x. .. note:: @@ -314,8 +294,8 @@ def linear_slope(x, y, dim="time", nan_policy="none"): Returns: xarray object: Slopes computed through a least-squares linear regression. """ - has_dims(x, dim, "predictor (x)") - has_dims(y, dim, "predictand (y)") + has_dims(x, dim, 'predictor (x)') + has_dims(y, dim, 'predictand (y)') _check_y_not_independent_variable(y, dim) x, slope_factor = _convert_time_and_return_slope_factor(x, dim) @@ -323,11 +303,11 @@ def _linear_slope(x, y, nan_policy): x, y = _handle_nans(x, y, nan_policy) # This catches cases where a given grid cell is full of nans, like in # land masking. - if (nan_policy in ["omit", "drop"]) and (x.size == 0): + if (nan_policy in ['omit', 'drop']) and (x.size == 0): return np.asarray([np.nan]) # This catches cases where there is missing values in the independent axis, # which breaks polyfit. - elif (nan_policy in ["none", "propagate"]) and (has_missing(x)): + elif (nan_policy in ['none', 'propagate']) and (has_missing(x)): return np.asarray([np.nan]) else: return np.polyfit(x, y, 1)[0] @@ -338,16 +318,16 @@ def _linear_slope(x, y, nan_policy): y, nan_policy, vectorize=True, - dask="parallelized", + dask='parallelized', input_core_dims=[[dim], [dim], []], - output_dtypes=["float64"], + output_dtypes=['float64'], ) _warn_if_not_converted_to_original_time_units(x) return slopes * slope_factor @is_xarray([0, 1]) -def linregress(x, y, dim="time", nan_policy="none"): +def linregress(x, y, dim='time', nan_policy='none'): """Vectorized applciation of ``scipy.stats.linregress``. .. note:: @@ -378,8 +358,8 @@ def linregress(x, y, dim="time", nan_policy="none"): "parameter". """ - has_dims(x, dim, "predictor (x)") - has_dims(y, dim, "predictand (y)") + has_dims(x, dim, 'predictor (x)') + has_dims(y, dim, 'predictand (y)') _check_y_not_independent_variable(y, dim) x, slope_factor = _convert_time_and_return_slope_factor(x, dim) @@ -387,7 +367,7 @@ def _linregress(x, y, slope_factor, nan_policy): x, y = _handle_nans(x, y, nan_policy) # This catches cases where a given grid cell is full of nans, like in # land masking. - if (nan_policy in ["omit", "drop"]) and (x.size == 0): + if (nan_policy in ['omit', 'drop']) and (x.size == 0): return np.full(5, np.nan) else: m, b, r, p, e = scipy.stats.linregress(x, y) @@ -404,21 +384,21 @@ def _linregress(x, y, slope_factor, nan_policy): slope_factor, nan_policy, vectorize=True, - dask="parallelized", + dask='parallelized', input_core_dims=[[dim], [dim], [], []], - output_core_dims=[["parameter"]], - output_dtypes=["float64"], - output_sizes={"parameter": 5}, + output_core_dims=[['parameter']], + output_dtypes=['float64'], + output_sizes={'parameter': 5}, ) results = results.assign_coords( - parameter=["slope", "intercept", "rvalue", "pvalue", "stderr"] + parameter=['slope', 'intercept', 'rvalue', 'pvalue', 'stderr'] ) _warn_if_not_converted_to_original_time_units(x) return results @is_xarray(0) -def polyfit(x, y, order, dim="time", nan_policy="none"): +def polyfit(x, y, order, dim='time', nan_policy='none'): """Returns the fitted polynomial line of ``y`` regressed onto ``x``. .. note:: @@ -444,8 +424,8 @@ def polyfit(x, y, order, dim="time", nan_policy="none"): xarray object: The polynomial fit for ``y`` regressed onto ``x``. Has the same dimensions as ``y``. """ - has_dims(x, dim, "predictor (x)") - has_dims(y, dim, "predictand (y)") + has_dims(x, dim, 'predictor (x)') + has_dims(y, dim, 'predictand (y)') _check_y_not_independent_variable(y, dim) x, _ = _convert_time_and_return_slope_factor(x, dim) @@ -456,15 +436,15 @@ def polyfit(x, y, order, dim="time", nan_policy="none"): order, nan_policy, vectorize=True, - dask="parallelized", + dask='parallelized', input_core_dims=[[dim], [dim], [], []], output_core_dims=[[dim]], - output_dtypes=["float"], + output_dtypes=['float'], ) @is_xarray(0) -def rm_poly(x, y, order, dim="time", nan_policy="none"): +def rm_poly(x, y, order, dim='time', nan_policy='none'): """Removes a polynomial fit from ``y`` regressed onto ``x``. Args: @@ -485,8 +465,8 @@ def rm_poly(x, y, order, dim="time", nan_policy="none"): Returns: xarray object: ``y`` with polynomial fit of order ``order`` removed. """ - has_dims(x, dim, "predictor (x)") - has_dims(y, dim, "predictand (y)") + has_dims(x, dim, 'predictor (x)') + has_dims(y, dim, 'predictand (y)') _check_y_not_independent_variable(y, dim) x, _ = _convert_time_and_return_slope_factor(x, dim) @@ -501,15 +481,15 @@ def _rm_poly(x, y, order, nan_policy): order, nan_policy, vectorize=True, - dask="parallelized", + dask='parallelized', input_core_dims=[[dim], [dim], [], []], output_core_dims=[[dim]], - output_dtypes=["float64"], + output_dtypes=['float64'], ) @is_xarray(0) -def rm_trend(x, y, dim="time", nan_policy="none"): +def rm_trend(x, y, dim='time', nan_policy='none'): """Removes a linear fit from ``y`` regressed onto ``x``. Args: @@ -533,110 +513,18 @@ def rm_trend(x, y, dim="time", nan_policy="none"): @is_xarray(0) -def autocorr(ds, lag=1, dim="time", return_p=False): - """ - Calculated lagged correlation of a xr.Dataset. - Parameters - ---------- - ds : xarray dataset/dataarray - lag : int (default 1) - number of time steps to lag correlate. - dim : str (default 'time') - name of time dimension/dimension to autocorrelate over - return_p : boolean (default False) - if false, return just the correlation coefficient. - if true, return both the correlation coefficient and p-value. - Returns - ------- - r : Pearson correlation coefficient - p : (if return_p True) p-value - """ - return st.autocorr(ds, lag=lag, dim=dim, return_p=return_p) - +def standardize(ds, dim='time'): + """Standardize Dataset/DataArray -@is_xarray(0) -def ACF(ds, dim="time", nlags=None): - """ - Compute the ACF of a time series to a specific lag. + .. math:: + \\frac{x - \\mu_{x}}{\\sigma_{x}} Args: - ds (xarray object): dataset/dataarray containing the time series. - dim (str): dimension to apply ACF over. - nlags (optional int): number of lags to compute ACF over. If None, - compute for length of `dim` on `ds`. + ds (xarray object): Dataset or DataArray with variable(s) to standardize. + dim (optional str): Which dimension to standardize over (default 'time'). Returns: - Dataset or DataArray with ACF results. - - Notes: - This is preferred over ACF functions from MATLAB/scipy, since it doesn't - use FFT methods. - """ - # Drop variables that don't have requested dimension, so this can be - # applied over the full dataset. - if isinstance(ds, xr.Dataset): - dropVars = [i for i in ds if dim not in ds[i].dims] - ds = ds.drop(dropVars) - - # Loop through every step in `dim` - if nlags is None: - nlags = ds[dim].size - - acf = [] - # The 2 factor accounts for fact that time series reduces in size for - # each lag. - for i in range(nlags - 2): - res = autocorr(ds, lag=i, dim=dim) - acf.append(res) - acf = xr.concat(acf, dim=dim) - return acf - - -@is_xarray(0) -def corr(x, y, dim="time", lead=0, return_p=False): - """ - Computes the Pearson product-momment coefficient of linear correlation. - - This version calculates the effective degrees of freedom, accounting - for autocorrelation within each time series that could fluff the - significance of the correlation. - - Parameters - ---------- - x, y : xarray DataArray - time series being correlated (can be multi-dimensional) - dim : str (default 'time') - Correlation dimension - lead : int (default 0) - If lead > 0, x leads y by that many time steps. - If lead < 0, x lags y by that many time steps. - return_p : boolean (default False) - If true, return both r and p - - Returns - ------- - r : correlation coefficient - p : p-value accounting for autocorrelation (if return_p True) - - References (for dealing with autocorrelation): - ---------- - 1. Wilks, Daniel S. Statistical methods in the atmospheric sciences. - Vol. 100. Academic press, 2011. - 2. Lovenduski, Nicole S., and Nicolas Gruber. "Impact of the Southern - Annular Mode on Southern Ocean circulation and biology." Geophysical - Research Letters 32.11 (2005). - 3. Brady, R. X., Lovenduski, N. S., Alexander, M. A., Jacox, M., and - Gruber, N.: On the role of climate modes in modulating the air-sea CO2 - fluxes in Eastern Boundary Upwelling Systems, Biogeosciences Discuss., - https://doi.org/10.5194/bg-2018-415, in review, 2018. + stdized (xarray object): Standardized variable(s). """ - # Broadcasts a time series to the same coordinates/size as the grid. If they - # are both grids, this function does nothing and isn't expensive. - x, y = xr.broadcast(x, y) - - # Negative lead should have y lead x. - if lead < 0: - lead = np.abs(lead) - return st.corr(y, x, dim=dim, lag=lead, return_p=return_p) - else: - return st.corr(x, y, dim=dim, lag=lead, return_p=return_p) + stdized = (ds - ds.mean(dim)) / ds.std(dim) + return stdized diff --git a/esmtools/testing.py b/esmtools/testing.py index 8e3cb13..8cae7e3 100644 --- a/esmtools/testing.py +++ b/esmtools/testing.py @@ -6,24 +6,7 @@ from .checks import is_xarray from .constants import MULTIPLE_TESTS -__all__ = ["ttest_ind_from_stats", "multipletests"] - - -def ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2): - """Parallelize scipy.stats.ttest_ind_from_stats.""" - return xr.apply_ufunc( - tti_from_stats, - mean1, - std1, - nobs1, - mean2, - std2, - nobs2, - input_core_dims=[[], [], [], [], [], []], - output_core_dims=[[], []], - vectorize=True, - dask="parallelized", - ) +__all__ = ['ttest_ind_from_stats', 'multipletests'] @is_xarray(0) @@ -60,7 +43,7 @@ def multipletests(p, alpha=0.05, method=None, **multipletests_kwargs): if method is None: raise ValueError( f"Please indicate a method using the 'method=...' keyword. " - f"Select from {MULTIPLE_TESTS}" + f'Select from {MULTIPLE_TESTS}' ) elif method not in MULTIPLE_TESTS: raise ValueError( @@ -81,4 +64,35 @@ def multipletests(p, alpha=0.05, method=None, **multipletests_kwargs): p_stacked[mask], alpha=alpha, method=method, **multipletests_kwargs ) - return reject.unstack("s"), pvals_corrected.unstack("s") + return reject.unstack('s'), pvals_corrected.unstack('s') + + +def ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var=True): + """Parallelize scipy.stats.ttest_ind_from_stats and make dask-compatible. + + Args: + mean1, mean2 (array_like): The means of samples 1 and 2. + std1, std2 (array_like): The standard deviations of samples 1 and 2. + nobs1, nobs2 (array_like): The number of observations for samples 1 and 2. + equal_var (bool, optional): If True (default), perform a standard independent + 2 sample test that assumes equal population variances. If False, perform + Welch's t-test, which does not assume equal population variance. + + Returns: + statistic (float or array): The calculated t-statistics. + pvalue (float or array): The two-tailed p-value. + """ + return xr.apply_ufunc( + tti_from_stats, + mean1, + std1, + nobs1, + mean2, + std2, + nobs2, + equal_var, + input_core_dims=[[], [], [], [], [], [], []], + output_core_dims=[[], []], + vectorize=True, + dask='parallelized', + ) diff --git a/esmtools/timeutils.py b/esmtools/timeutils.py index 7320d4f..c5106e1 100644 --- a/esmtools/timeutils.py +++ b/esmtools/timeutils.py @@ -7,8 +7,11 @@ from .constants import DAYS_PER_MONTH, DAYS_PER_YEAR -@xr.register_dataarray_accessor("timeutils") +@xr.register_dataarray_accessor('timeutils') class TimeUtilAccessor: + """Accessor for cftime, datetime, and timeoffset indexed DataArrays. This aids + in modifying time axes for slope correction and for converting to numeric time.""" + def __init__(self, xarray_obj): self._obj = xarray_obj self._is_temporal = contains_datetime_like_objects(self._obj) @@ -57,61 +60,61 @@ def slope_factor(self): def return_numeric_time(self): """Returns numeric time.""" if self.is_datetime_like: - x = (self._obj - np.datetime64("1990-01-01")) / np.timedelta64(1, "D") + x = (self._obj - np.datetime64('1990-01-01')) / np.timedelta64(1, 'D') return x elif self.is_cftime_like: x = cftime.date2num( - self._obj, "days since 1990-01-01", calendar=self.calendar + self._obj, 'days since 1990-01-01', calendar=self.calendar ) x = xr.DataArray(x, dims=self._obj.dims, coords=self._obj.coords) return x else: - raise ValueError("DataArray is not a time array of datetimes or cftimes.") + raise ValueError('DataArray is not a time array of datetimes or cftimes.') @staticmethod def construct_quarterly_aliases(): - quarters = ["Q", "BQ", "QS", "BQS"] + quarters = ['Q', 'BQ', 'QS', 'BQS'] for month in [ - "JAN", - "FEB", - "MAR", - "APR", - "MAY", - "JUN", - "JUL", - "AUG", - "SEP", - "OCT", - "NOV", - "DEC", + 'JAN', + 'FEB', + 'MAR', + 'APR', + 'MAY', + 'JUN', + 'JUL', + 'AUG', + 'SEP', + 'OCT', + 'NOV', + 'DEC', ]: - quarters.append(f"Q-{month}") - quarters.append(f"BQ-{month}") - quarters.append(f"BQS-{month}") - quarters.append(f"QS-{month}") + quarters.append(f'Q-{month}') + quarters.append(f'BQ-{month}') + quarters.append(f'BQS-{month}') + quarters.append(f'QS-{month}') return quarters @staticmethod def construct_annual_aliases(): - years = ["A", "Y", "BA", "BY", "AS", "YS", "BAS", "BYS", "Q"] + years = ['A', 'Y', 'BA', 'BY', 'AS', 'YS', 'BAS', 'BYS', 'Q'] for month in [ - "JAN", - "FEB", - "MAR", - "APR", - "MAY", - "JUN", - "JUL", - "AUG", - "SEP", - "OCT", - "NOV", - "DEC", + 'JAN', + 'FEB', + 'MAR', + 'APR', + 'MAY', + 'JUN', + 'JUL', + 'AUG', + 'SEP', + 'OCT', + 'NOV', + 'DEC', ]: - years.append(f"A-{month}") - years.append(f"BA-{month}") - years.append(f"BAS-{month}") - years.append(f"AS-{month}") + years.append(f'A-{month}') + years.append(f'BA-{month}') + years.append(f'BAS-{month}') + years.append(f'AS-{month}') return years def construct_slope_factors(self): @@ -121,21 +124,21 @@ def construct_slope_factors(self): quarters = self.construct_quarterly_aliases() quarters = {k: self.annual_factor / 4 for k in quarters} - months = ("M", "BM", "CBM", "MS", "BMS", "CBMS") + months = ('M', 'BM', 'CBM', 'MS', 'BMS', 'CBMS') months = {k: self.annual_factor / 12 for k in months} - semimonths = {k: 15 for k in ("SM", "SMS")} + semimonths = {k: 15 for k in ('SM', 'SMS')} - weeks = ("W", "W-SUN", "W-MON", "W-TUE", "W-WED", "W-THU", "W-FRI", "W-SAT") + weeks = ('W', 'W-SUN', 'W-MON', 'W-TUE', 'W-WED', 'W-THU', 'W-FRI', 'W-SAT') weeks = {k: 7 for k in weeks} - days = {k: 1 for k in ("B", "C", "D")} - hours = {k: 1 / 24 for k in ("BH", "H")} - mins = {k: 1 / (24 * 60) for k in ("T", "min")} - secs = {k: 1 / (24 * 60 * 60) for k in ("S")} - millisecs = {k: 1 / (24 * 60 * 60 * 1e3) for k in ("ms", "L")} - microsecs = {k: 1 / (24 * 60 * 60 * 1e6) for k in ("U", "us")} - nanosecs = {k: 1 / (24 * 60 * 60 * 1e9) for k in ("N")} + days = {k: 1 for k in ('B', 'C', 'D')} + hours = {k: 1 / 24 for k in ('BH', 'H')} + mins = {k: 1 / (24 * 60) for k in ('T', 'min')} + secs = {k: 1 / (24 * 60 * 60) for k in ('S')} + millisecs = {k: 1 / (24 * 60 * 60 * 1e3) for k in ('ms', 'L')} + microsecs = {k: 1 / (24 * 60 * 60 * 1e6) for k in ('U', 'us')} + nanosecs = {k: 1 / (24 * 60 * 60 * 1e9) for k in ('N')} DATETIME_FACTOR = {} for d in ( @@ -178,13 +181,13 @@ def get_calendar(dates): Raises: ValueError: If inferred calendar is not in our list of supported calendars. """ - if np.asarray(dates).dtype == "datetime64[ns]": - return "proleptic_gregorian" + if np.asarray(dates).dtype == 'datetime64[ns]': + return 'proleptic_gregorian' else: return np.asarray(dates).ravel()[0].calendar -def get_days_per_month(time, calendar="standard"): +def get_days_per_month(time, calendar='standard'): """Return an array of days per month corresponding to a given calendar. Args: @@ -197,7 +200,7 @@ def get_days_per_month(time, calendar="standard"): Raises: ValueError: If input time index is not a CFTimeIndex or DatetimeIndex. """ - is_time_index(time, "time index") + is_time_index(time, 'time index') month_length = np.zeros(len(time), dtype=np.int) cal_days = DAYS_PER_MONTH[calendar] @@ -217,15 +220,15 @@ def is_time_index(xobj, kind): i.e. through .to_index() """ xtype = type(xobj).__name__ - if xtype not in ["CFTimeIndex", "DatetimeIndex"]: + if xtype not in ['CFTimeIndex', 'DatetimeIndex']: raise ValueError( - f"Your {kind} object must be either an xr.CFTimeIndex or " - f"pd.DatetimeIndex." + f'Your {kind} object must be either an xr.CFTimeIndex or ' + f'pd.DatetimeIndex.' ) return True -def leap_year(year, calendar="standard"): +def leap_year(year, calendar='standard'): """Determine if year is a leap year. Args: @@ -236,18 +239,18 @@ def leap_year(year, calendar="standard"): bool: True if year is a leap year. """ leap = False - if (calendar in ["standard", "gregorian", "proleptic_gregorian", "julian"]) and ( + if (calendar in ['standard', 'gregorian', 'proleptic_gregorian', 'julian']) and ( year % 4 == 0 ): leap = True if ( - (calendar == "proleptic_gregorian") + (calendar == 'proleptic_gregorian') and (year % 100 == 0) and (year % 400 != 0) ): leap = False elif ( - (calendar in ["standard", "gregorian"]) + (calendar in ['standard', 'gregorian']) and (year % 100 == 0) and (year % 400 != 0) and (year < 1583) @@ -264,9 +267,9 @@ def infer_freq(index): if isinstance(index, (DataArray, pd.Series)): dtype = np.asarray(index).dtype - if dtype == "datetime64[ns]": + if dtype == 'datetime64[ns]': index = pd.DatetimeIndex(index.values) - elif dtype == "timedelta64[ns]": + elif dtype == 'timedelta64[ns]': index = pd.TimedeltaIndex(index.values) else: index = xr.CFTimeIndex(index.values) @@ -285,18 +288,18 @@ def infer_freq(index): _ONE_HOUR = 60 * _ONE_MINUTE _ONE_DAY = 24 * _ONE_HOUR _MONTH_ABBREVIATIONS = { - 1: "JAN", - 2: "FEB", - 3: "MAR", - 4: "APR", - 5: "MAY", - 6: "JUN", - 7: "JUL", - 8: "AUG", - 9: "SEP", - 10: "OCT", - 11: "NOV", - 12: "DEC", + 1: 'JAN', + 2: 'FEB', + 3: 'MAR', + 4: 'APR', + 5: 'MAY', + 6: 'JUN', + 7: 'JUL', + 8: 'AUG', + 9: 'SEP', + 10: 'OCT', + 11: 'NOV', + 12: 'DEC', } @@ -306,7 +309,7 @@ def __init__(self, index): self.values = index.asi8 if len(index) < 3: - raise ValueError("Need at least 3 dates to infer frequency") + raise ValueError('Need at least 3 dates to infer frequency') self.is_monotonic = ( self.index.is_monotonic_decreasing or self.index.is_monotonic_increasing @@ -330,22 +333,22 @@ def get_freq(self): return None if _is_multiple(delta, _ONE_HOUR): - return _maybe_add_count("H", delta / _ONE_HOUR) + return _maybe_add_count('H', delta / _ONE_HOUR) elif _is_multiple(delta, _ONE_MINUTE): - return _maybe_add_count("T", delta / _ONE_MINUTE) + return _maybe_add_count('T', delta / _ONE_MINUTE) elif _is_multiple(delta, _ONE_SECOND): - return _maybe_add_count("S", delta / _ONE_SECOND) + return _maybe_add_count('S', delta / _ONE_SECOND) elif _is_multiple(delta, _ONE_MILLI): - return _maybe_add_count("L", delta / _ONE_MILLI) + return _maybe_add_count('L', delta / _ONE_MILLI) else: - return _maybe_add_count("U", delta / _ONE_MICRO) + return _maybe_add_count('U', delta / _ONE_MICRO) def _infer_daily_rule(self): annual_rule = self._get_annual_rule() if annual_rule: nyears = self.year_deltas[0] month = _MONTH_ABBREVIATIONS[self.index[0].month] - alias = f"{annual_rule}-{month}" + alias = f'{annual_rule}-{month}' return _maybe_add_count(alias, nyears) quartely_rule = self._get_quartely_rule() @@ -353,7 +356,7 @@ def _infer_daily_rule(self): nquarters = self.month_deltas[0] / 3 mod_dict = {0: 12, 2: 11, 1: 10} month = _MONTH_ABBREVIATIONS[mod_dict[self.index[0].month % 3]] - alias = f"{quartely_rule}-{month}" + alias = f'{quartely_rule}-{month}' return _maybe_add_count(alias, nquarters) monthly_rule = self._get_monthly_rule() @@ -363,7 +366,7 @@ def _infer_daily_rule(self): if len(self.deltas) == 1: # Daily as there is no "Weekly" offsets with CFTime days = self.deltas[0] / _ONE_DAY - return _maybe_add_count("D", days) + return _maybe_add_count('D', days) # CFTime has no business freq and no "week of month" (WOM) return None @@ -375,7 +378,7 @@ def _get_annual_rule(self): if len(np.unique(self.index.month)) > 1: return None - return {"cs": "AS", "ce": "A"}.get(month_anchor_check(self.index)) + return {'cs': 'AS', 'ce': 'A'}.get(month_anchor_check(self.index)) def _get_quartely_rule(self): if len(self.month_deltas) > 1: @@ -384,13 +387,13 @@ def _get_quartely_rule(self): if not self.month_deltas[0] % 3 == 0: return None - return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index)) + return {'cs': 'QS', 'ce': 'Q'}.get(month_anchor_check(self.index)) def _get_monthly_rule(self): if len(self.month_deltas) > 1: return None - return {"cs": "MS", "ce": "M"}.get(month_anchor_check(self.index)) + return {'cs': 'MS', 'ce': 'M'}.get(month_anchor_check(self.index)) @property def deltas(self): @@ -424,7 +427,7 @@ def _maybe_add_count(base: str, count: float): if count != 1: assert count == int(count) count = int(count) - return f"{count}{base}" + return f'{count}{base}' else: return base @@ -452,9 +455,9 @@ def month_anchor_check(dates): break if calendar_end: - return "ce" + return 'ce' elif calendar_start: - return "cs" + return 'cs' else: return None diff --git a/esmtools/utils.py b/esmtools/utils.py index 5d0cc8a..0db3b6a 100644 --- a/esmtools/utils.py +++ b/esmtools/utils.py @@ -3,15 +3,6 @@ from .checks import has_missing -def get_dims(da): - """ - Simple function to retrieve dimensions from a given dataset/datarray. - Currently returns as a list, but can add keyword to select tuple or - list if desired for any reason. - """ - return list(da.dims) - - def match_nans(x, y): """Performs pairwise matching of nans between ``x`` and ``y``. @@ -28,9 +19,9 @@ def match_nans(x, y): x, y = x.copy(), y.copy() idx = np.logical_or(np.isnan(x), np.isnan(y)) # NaNs cannot be added to `int` arrays. - if x.dtype == "int": - x = x.astype("float") - if y.dtype == "int": - y = y.astype("float") + if x.dtype == 'int': + x = x.astype('float') + if y.dtype == 'int': + y = y.astype('float') x[idx], y[idx] = np.nan, np.nan return x, y