# Tests for SMM versus CDO

There are a few tests to check if the SMM approach is faster than the CDO one and if it is reliable. Encouraging results below, although tested only on 2D data so far.

In [1]:
from time import time
import timeit
import os
import numpy as np
import xarray as xr
from smmregrid import cdo_generate_weights, Regridder
from smmregrid.checker import check_cdo_regrid # this is a new function introduced to verify the output
from cdo import Cdo
cdo = Cdo()

# where and which the data are
indir='tests/data'
filelist = ['onlytos-ipsl.nc','tas-ecearth.nc', '2t-era5.nc','tos-fesom.nc']
tfile = os.path.join(indir, 'r360x180.nc')

# method for remapping
methods = ['nn','con']

This is to verify that the regridding is equal: this is done by comparing the output from CDO to the output obtained by SMM. 
The files to be checked are above.

In [2]:

for filein in filelist: 
    for method in methods :
        cc = check_cdo_regrid(os.path.join(indir,filein), tfile, method = method)
        print(filein + ': remap' + method + '->' + str(cc))


['tos']
onlytos-ipsl.nc: remapnn->True
['tos']
onlytos-ipsl.nc: remapcon->True
['tas']
tas-ecearth.nc: remapnn->True
['tas']
tas-ecearth.nc: remapcon->True
['2t']
2t-era5.nc: remapnn->True
['2t']
2t-era5.nc: remapcon->True
['tos']
tos-fesom.nc: remapnn->True
['tos']
tos-fesom.nc: remapcon->True


Test the different weights generation possibilities with CDO, tested with conservative remapping: the climtas code is way more efficient if files are already on the disk, since the call to CDO has to be done from file. CDO bindings have a minimum overhead to be considered

In [3]:
# nrepetition for the check
nr = 5


# generate weights from file
for filein in filelist: 
 
    # open file
    xfield = xr.open_mfdataset(os.path.join(indir,filein))
    tfield = xr.open_mfdataset(tfile)

    # generate weights from file
    one = timeit.timeit(lambda: cdo_generate_weights(os.path.join(indir,filein), tfile, method = 'con'), number = nr)
    print(filein + ': Exectime climtas from file ' + str(one/nr))
    # generate weights from xarray
    two = timeit.timeit(lambda: cdo_generate_weights(xfield, tfield, method = 'con'), number = nr)
    print(filein + ': Exectime climtas from xarray ' + str(two/nr))
    # generatre weights with CDO bindings (from file)
    three = timeit.timeit(lambda: cdo.gencon(tfile, input = os.path.join(indir,filein), returnXDataset = True), number = nr)
    print(filein + ': Exectime cdo from file ' + str(three/nr))


onlytos-ipsl.nc: Exectime climtas from file 1.7797725904034452
onlytos-ipsl.nc: Exectime climtas from xarray 1.9583635907853023
onlytos-ipsl.nc: Exectime cdo from file 1.860802805610001
tas-ecearth.nc: Exectime climtas from file 0.958152796397917
tas-ecearth.nc: Exectime climtas from xarray 1.2780904907966033
tas-ecearth.nc: Exectime cdo from file 1.0990737908054142
2t-era5.nc: Exectime climtas from file 0.49766466959845274
2t-era5.nc: Exectime climtas from xarray 0.5890101308003068
2t-era5.nc: Exectime cdo from file 0.6149559417972341
tos-fesom.nc: Exectime climtas from file 3.5004765935940667
tos-fesom.nc: Exectime climtas from xarray 5.121664120396599
tos-fesom.nc: Exectime cdo from file 3.643564500613138


Test the full remap (generation of the weight + applicaton) of CDO vs SMM. Results seems very much comparable!

In [4]:
# nrepetition for the check
nr = 5

def smm_remap(ifile, tfile):

    xfield = xr.open_mfdataset(ifile)
    wfield = cdo_generate_weights(ifile, tfile, method = 'con')
    interpolator = Regridder(weights=wfield)
    var = list(xfield.data_vars)[-1]
    rfield = interpolator.regrid(xfield)
    return(rfield)

for filein in filelist: 

    one = timeit.timeit(lambda: cdo.remapcon(tfile, input = os.path.join(indir,filein), returnXDataset = True), number = nr)
    print(filein + ': Exectime CDO Weight+Remap ' + str(one/nr))
    two = timeit.timeit(lambda: smm_remap(os.path.join(indir,filein), tfile), number = nr)
    print(filein + ': Exectime SMM Weight+Remap ' + str(two/nr))


onlytos-ipsl.nc: Exectime CDO Weight+Remap 1.80319197261706
onlytos-ipsl.nc: Exectime SMM Weight+Remap 1.8579165945993736
tas-ecearth.nc: Exectime CDO Weight+Remap 1.104606278403662
tas-ecearth.nc: Exectime SMM Weight+Remap 1.0557268365984782
2t-era5.nc: Exectime CDO Weight+Remap 0.6187339887954295
2t-era5.nc: Exectime SMM Weight+Remap 0.557701671589166
tos-fesom.nc: Exectime CDO Weight+Remap 3.475422440189868
tos-fesom.nc: Exectime SMM Weight+Remap 3.6063115095952525


Test the computation of the remap when the weights are pre-computed. Considering that SMM does not have to write anything to disk, it is several times faster. However, once we have introduced the masking, we lost almost a factor of two. 

In [4]:
# nrepetition for the check
nr = 10

for filein in filelist: 

    # CDO
    wfile = cdo.gencon(tfile, input = os.path.join(indir,filein))
    one = timeit.timeit(lambda: cdo.remap(tfile + ',' + wfile, input = os.path.join(indir,filein), returnXDataset = True), number = nr)
    print(filein + ': Exectime CDO Remap ' + str(one/nr))

    # SMM
    xfield = xr.open_mfdataset(os.path.join(indir,filein))
    wfield = cdo_generate_weights(os.path.join(indir,filein), tfile, method = 'con')
    interpolator = Regridder(weights=wfield)
    two = timeit.timeit(lambda: interpolator.regrid(xfield), number = nr)
    print(filein + ': Exectime SMM Remap ' + str(two/nr))

onlytos-ipsl.nc: Exectime CDO Remap 0.2543602315010503
onlytos-ipsl.nc: Exectime SMM Remap 0.05708544449880719
tas-ecearth.nc: Exectime CDO Remap 0.24985597220947967
tas-ecearth.nc: Exectime SMM Remap 0.06356588089838625
2t-era5.nc: Exectime CDO Remap 0.19433286940911784
2t-era5.nc: Exectime SMM Remap 0.03430949010653421
tos-fesom.nc: Exectime CDO Remap 0.45873337949160486
tos-fesom.nc: Exectime SMM Remap 0.06254725940525532
