# Speed tests for smmregrid (without dask) versus CDO

There are a few tests to check if the SMM approach is faster than the CDO one and if it is reliable in terms of output. Tested with both 2D and 3D vars, using DataArray and Datasets.
More complex oceanic 3D data structures are not yet considered

In [1]:
import timeit
import os
import xarray as xr
from smmregrid import cdo_generate_weights, Regridder
from cdo import Cdo
import pandas as pd
import copy
cdo = Cdo()
import dask
dask.config.set(scheduler="synchronous")

# where and which the data are
indir='tests/data'
filelist = ['tas-healpix2.nc', 'onlytos-ipsl.nc','tas-ecearth.nc', 
            '2t-era5.nc','tos-fesom.nc', 'ua-ecearth.nc', 'mix-cesm.nc']#,'era5-mon.nc'] # the last is not available on github
#'lsm-ifs.grb'
#filelist = ['tos-fesom.nc','onlytos-ipsl.nc','tas-ecearth.nc'] 
#filelist = ['tas-ecearth.nc']
tfile = os.path.join(indir, 'r360x180.nc')

# method for remapping
methods = ['nn','con','bil']
#methods = ['con']
accesses = ['Dataset', 'DataArray']


# create an iterable dictionary, and clean cases where we know CDO does not work
defdict = {'methods': methods, 'accesses': accesses, 'extra': '', 'chunks': None}
base = {k: copy.deepcopy(defdict) for k in filelist}
if 'tos-fesom.nc' in filelist:
    base['tos-fesom.nc']['methods'].remove('bil')
if 'tas-healpix2.nc' in filelist:
    base['tas-healpix2.nc']['methods'].remove('bil')
if 'lsm-ifs.grb' in filelist:
    base['lsm-ifs.grb']['extra'] = '-setgridtype,regular'
    base['lsm-ifs.grb']['methods'].remove('bil')
    base['lsm-ifs.grb']['methods'].remove('con')
if 'mix-cesm.nc' in filelist:
    base['mix-cesm.nc']['accesses'].remove('DataArray')
if 'era5-mon.nc' in filelist:
    base['era5-mon.nc']['chunks'] = {'time': 12}
if 'ua-ecearth.nc' in filelist:
    base['ua-ecearth.nc']['chunks'] = {'plev': 3}

## Full remapping 

Test the full remap (generation of the weight + applicaton) of CDO vs SMM. Still using conservative remapping. Results seems very much comparable!

In [2]:
# nrepetition for the check
nr = 10

if 'lsm-ifs.grb' in filelist:
    base.pop('lsm-ifs.grb')

# fast function to call the entire interpolation
def smm_remap(ifile, tfile):

    xfield = xr.open_mfdataset(ifile)
    wfield = cdo_generate_weights(ifile, tfile, method = 'con')
    interpolator = Regridder(weights=wfield)
    rfield = interpolator.regrid(xfield)
    return(rfield)

data =[]
for filein in base.keys(): 

    one = timeit.timeit(lambda: cdo.remapcon(tfile, input = os.path.join(indir,filein), returnXDataset = True), number = nr)
    #print(filein + ': Exectime CDO Weight+Remap ' + str(one/nr))
    two = timeit.timeit(lambda: smm_remap(os.path.join(indir,filein), tfile), number = nr)
    #print(filein + ': Exectime SMM Weight+Remap ' + str(two/nr))
    data.append([one, two])

cnames = ['CDO (Weight+Remap)', 'SMM (Weight+Remap)']
df = pd.DataFrame(data, index = base.keys(), columns = cnames)
df.div(df[cnames[0]],axis =0)


Unnamed: 0,CDO (Weight+Remap),SMM (Weight+Remap)
tas-healpix2.nc,1.0,0.801709
onlytos-ipsl.nc,1.0,0.870045
tas-ecearth.nc,1.0,0.753496
2t-era5.nc,1.0,0.626265
tos-fesom.nc,1.0,0.92433
ua-ecearth.nc,1.0,0.719938
mix-cesm.nc,1.0,0.665692


# Remapping (with weights available)

This is the real goal of smmregrid. Here we test the computation of the remap when the weights are pre-computed, still using with conservative remapping. Considering that SMM does not have to write anything to disk, it is a few times faster. Running with Dataset implies a bit of overhead (20%). Masks have been integrated and create a small overhead when needed. Of course, loading the files into memory implies a considerable slowdown.

In [3]:
data =[]
for filein in base.keys(): 
    nr = 10

    # CDO
    wfile = cdo.gencon(tfile, input = os.path.join(indir,filein))
    ccdo = timeit.timeit(lambda: cdo.remap(tfile + ',' + wfile, input = os.path.join(indir,filein), returnXDataset = True).load(), number = nr)
    cdonoload = timeit.timeit(lambda: cdo.remap(tfile + ',' + wfile, input = os.path.join(indir,filein), returnXDataset = True), number = nr)
    #print(filein + ': Exectime CDO Remap ' + str(one/nr))

    # SMM: load field and weights, initialize regridder
    xfield = xr.open_mfdataset(os.path.join(indir,filein)).load()
    wfield = cdo_generate_weights(os.path.join(indir,filein), tfile, method = 'con').load()
    interpolator = Regridder(weights=wfield)
 
    # var as the one which have time and not have bnds, pick the first one
    myvar = [var for var in xfield.data_vars 
             if 'time' in xfield[var].dims and 'bnds' not in xfield[var].dims]
   
    # dataset infos
    nrecords = xfield[myvar[0]].shape
    nvars = len(myvar)


    sset =      timeit.timeit(lambda: interpolator.regrid(xfield).load(), number = nr)
    arr =       timeit.timeit(lambda: interpolator.regrid(xfield[myvar[0]]).load(), number = nr)
    arrnoload = timeit.timeit(lambda: interpolator.regrid(xfield[myvar[0]]), number = nr)
    #arrnomask = timeit.timeit(lambda: interpolator.regrid(xfield[myvar[0]], masked = False).load(), number = nr)
    
    setwrite =  timeit.timeit(lambda: interpolator.regrid(xfield).to_netcdf('test.nc'), number = nr)
    if os.path.isfile('test.nc'):
        os.remove('test.nc')
    arrwrite = timeit.timeit(lambda: interpolator.regrid(xfield[myvar[0]]).to_netcdf('test2.nc'), number = nr)
    if os.path.isfile('test2.nc'):
        os.remove('test2.nc')
    data.append([nvars, nrecords, ccdo, cdonoload, sset, arr, arrnoload, setwrite, arrwrite])


cnames = ['NVars', 'NRecords', 'CDO', 'CDO (NoLoad)',
          'SMM (Dataset)', 'SMM (DataArray)', 'SMM (DataArray+NoLoad)', 
          'SMM (Dataset+Write)', 'SMM (DataArray+Write)']
df = pd.DataFrame(data, index = base.keys(), columns = cnames)
final = pd.concat([df.iloc[:,0:2],df.iloc[:,2:].div(df[cnames[2]], axis=0)], join='outer', axis=1)
final


Unnamed: 0,NVars,NRecords,CDO,CDO (NoLoad),SMM (Dataset),SMM (DataArray),SMM (DataArray+NoLoad),SMM (Dataset+Write),SMM (DataArray+Write)
tas-healpix2.nc,1,"(12, 12288)",1.0,0.965832,0.12865,0.103219,0.021833,0.115453,0.122626
onlytos-ipsl.nc,1,"(12, 332, 362)",1.0,0.98056,0.133137,0.133919,0.041179,0.144053,0.142921
tas-ecearth.nc,1,"(12, 256, 512)",1.0,0.993987,0.144356,0.145,0.037764,0.156681,0.155994
2t-era5.nc,1,"(12, 73, 144)",1.0,1.102008,0.087067,0.079244,0.022769,0.105055,0.092556
tos-fesom.nc,1,"(12, 126859)",1.0,0.981255,0.14175,0.13551,0.027498,0.144329,0.1434
ua-ecearth.nc,1,"(2, 19, 256, 512)",1.0,0.986048,0.23649,0.233315,0.068496,0.251854,0.271568
mix-cesm.nc,4,"(12, 192, 288)",1.0,0.921705,0.223586,0.077339,0.02113,0.254226,0.089087


## Weight generation

As a final note, test the different weights generation possibilities with CDO, tested with conservative remapping: the climtas code is way more efficient if files are already on the disk, since the call to CDO has to be done from file. CDO bindings have a minimum overhead to be considered

In [4]:
# nrepetition for the check
nr = 10

# generate weights from file
data = []
for filein in base.keys(): 
 
    # open file
    xfield = xr.open_mfdataset(os.path.join(indir,filein))
    tfield = xr.open_mfdataset(tfile)

    # generate weights from file
    one = timeit.timeit(lambda: cdo_generate_weights(os.path.join(indir,filein), tfile, method = 'con'), number = nr)
    #print(filein + ': Exectime climtas from file ' + str(one/nr))
    # generate weights from xarray
    two = timeit.timeit(lambda: cdo_generate_weights(xfield, tfield, method = 'con'), number = nr)
    #print(filein + ': Exectime climtas from xarray ' + str(two/nr))
    # generatre weights with CDO bindings (from file)
    three = timeit.timeit(lambda: cdo.gencon(tfile, input = os.path.join(indir,filein), returnXDataset = True), number = nr)
    #print(filein + ': Exectime cdo from file ' + str(three/nr))
    data.append([three, one, two])

cnames = ['CDO bindings', 'CDO subprocess (from file)', 'CDO subprocess (from xarray)']
df = pd.DataFrame(data, index = base.keys(), columns = cnames)
df.div(df[cnames[0]],axis =0)


Unnamed: 0,CDO bindings,CDO subprocess (from file),CDO subprocess (from xarray)
tas-healpix2.nc,1.0,0.694908,0.723904
onlytos-ipsl.nc,1.0,0.837255,0.879399
tas-ecearth.nc,1.0,0.709471,0.892979
2t-era5.nc,1.0,0.602283,0.658382
tos-fesom.nc,1.0,0.902827,1.253558
ua-ecearth.nc,1.0,0.702836,0.805423
mix-cesm.nc,1.0,0.646993,0.737864
