In [7]:
import xarray as xr
import intake
import statsmodels.api as sm
import numpy as np
lowess = sm.nonparametric.lowess

In [4]:
# Sample CMIP6 data
cat = intake.cat.nci.esgf.cmip6.search(source_id='ACCESS-CM2', experiment_id='historical', table_id='day', variable_id='tas', member_id='r1i1p1f1')
dsd = cat.to_dataset_dict(cdf_kwargs={'chunks': {'time': 1000}})
ds = list(dsd.values())[0]
tas = ds.tas


--> The keys in the returned dictionary of datasets are constructed as follows:
	'project.activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.version'


In [19]:
tas_y = tas.resample(time='Y').mean()
tas_y = tas_y.chunk({'time':-1, 'lat': 144/12, 'lon': 192/16})

In [20]:
tas_y

Unnamed: 0,Array,Chunk
Bytes,17.40 MiB,92.81 kiB
Shape,"(165, 144, 192)","(165, 12, 12)"
Count,1293 Tasks,192 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.40 MiB 92.81 kiB Shape (165, 144, 192) (165, 12, 12) Count 1293 Tasks 192 Chunks Type float32 numpy.ndarray",192  144  165,

Unnamed: 0,Array,Chunk
Bytes,17.40 MiB,92.81 kiB
Shape,"(165, 144, 192)","(165, 12, 12)"
Count,1293 Tasks,192 Chunks
Type,float32,numpy.ndarray


In [22]:
def loess_filter(y: np.array, step_size = 50):
    
    '''
    Applies the loess filter to a 1D numpy array.
    
    Parameters
    -----------
    data: the 1D array of values to apply the loess filter to
    step_size: the number of steps in each of the loess filter. The default is 50 points 
    in each window.
    
    Returns
    -------
    yhat: the data but, the loess version.
    
    Example
    -------
    >>> mean_temp = data.temp.values
    >>> mean_temp_loess = loess_filter(mean_temp)
    >>> 
    >>> # The mean temperature that has been detrended using the loess method.
    >>> mean_temp_loess_detrend = mean_temp - mean_temp_loess
    
    '''
    

    # Removign the nans (this is important as if two dataarrays where together in dataset
    # one might have been longer than the other, leaving a trail of NaNs at the end.)
    idy = np.isfinite(y)
    y = y[idy]
    
    # The equally spaced x-values.
    x =  np.arange(len(y))
    
    
    # The fraction to consider the linear trend of each time.
    frac = step_size/len(y)
    
    #yhat is the loess version of y - this is the final product.
    yhat = lowess(y, x, frac  = frac)
    
    return yhat[:,1]

In [None]:
%%time
# Loess filter
loess = np.apply_along_axis(loess_filter, tas_y.get_axis_num('time'), tas_y.values)