In [2]:
import numpy as np
from dask.diagnostics import ProgressBar
import xarray as xr
from scipy import stats
from scipy.stats import t
import statsmodels.api as sm

# Least squares regression with autocorrelation of time series or residuals

In [3]:
## As above but only returns slope and CI

In [4]:
def autocorrelation(ds):
    
    x = np.asarray(np.squeeze(np.where(np.isfinite(ds)))) # Ensure vector
    y = np.asarray(ds[x])
    
    if np.size(x) >= 10:
        
        result = stats.linregress(x, y)
        
        if isinstance(result.slope, float) & np.isfinite(result.slope):
            pred_values =  x*result.slope + result.intercept          
            resids = y - pred_values # residual values 
            autocorr = sm.tsa.acf(resids, nlags=1)
            sigma_N = np.std(resids)
            Ne = np.size(x) * (1 - autocorr[1])/(1 + autocorr[1])
            std_err_ne = np.sqrt(np.sum(resids**2)/((Ne-2)*np.sum((x - np.mean(x))**2)))
            t_crit = stats.t.ppf(0.975,Ne-2)
            confidence_interval_ne = t_crit*std_err_ne
            return np.array([result.slope, confidence_interval_ne])
        else: 
            return np.array([np.nan, np.nan])
    else:
        return np.array([np.nan, np.nan])

In [5]:
# https://zenodo.org/records/12586956 or equivalent

ds = xr.open_dataset('/media/gsilsbe/nasa_npp/cafe/archival/cafe_npp.nc')
ds


In [None]:
# Figure 1
npp_stats = xr.apply_ufunc(autocorrelation, 
                          ds.npp,
                          input_core_dims=[['time']],
                          output_core_dims=[['stats']], #['lat'], ['lon'],
                          vectorize=True,
                          dask="parallelized",
                          output_dtypes=['float64'],
                          dask_gufunc_kwargs=dict(output_sizes={"stats":2}))    

# Figure 2B
ap_stats = xr.apply_ufunc(autocorrelation, 
                          ds.ap,
                          input_core_dims=[['time']],
                          output_core_dims=[['stats']], #['lat'], ['lon'],
                          vectorize=True,
                          dask="parallelized",
                          output_dtypes=['float64'],
                          dask_gufunc_kwargs=dict(output_sizes={"stats":2})) 
