# Tests for SMM (with dask) versus CDO

There are the same speed test but using dask. Surprisingly, the code is much slower. There should be something wrong

In [1]:
from time import time
import timeit
import os
import numpy as np
import xarray as xr
from smmregrid import cdo_generate_weights, Regridder
from smmregrid.checker import check_cdo_regrid # this is a new function introduced to verify the output
from cdo import Cdo
import pandas as pd
cdo = Cdo()

# where and which the data are
indir='tests/data'
filelist = ['onlytos-ipsl.nc','tas-ecearth.nc', '2t-era5.nc','tos-fesom.nc']
tfile = os.path.join(indir, 'r360x180.nc')

# method for remapping
methods = ['nn','con']
accesses = ['DataArray', 'Data']

from dask.distributed import LocalCluster, Client
cluster = LocalCluster(ip="0.0.0.0", threads_per_worker=1, n_workers=2)
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 44465 instead


# Remapping (with weights available)

This is the real goal of smmregrid. Here we test the computation of the remap when the weights are pre-computed. Considering that SMM does not have to write anything to disk, it is several times faster, between 5 to 10. Running with Dataset implies a bit of overhead (20%). Masks so far does not seem to be an issue.

In [2]:
# nrepetition for the check
nr = 10

data =[]
for filein in filelist: 

    # CDO
    wfile = cdo.gencon(tfile, input = os.path.join(indir,filein))
    one = timeit.timeit(lambda: cdo.remap(tfile + ',' + wfile, input = os.path.join(indir,filein), returnXDataset = True), number = nr)
    #print(filein + ': Exectime CDO Remap ' + str(one/nr))

    # SMM
    xfield = xr.open_mfdataset(os.path.join(indir,filein))
    wfield = cdo_generate_weights(os.path.join(indir,filein), tfile, method = 'con')
    interpolator = Regridder(weights=wfield)
    # var as the one which have time and not have bnds (could work)
    myvar = [var for var in xfield.data_vars 
             if 'time' in xfield[var].dims and 'bnds' not in xfield[var].dims]
    two = timeit.timeit(lambda: interpolator.regrid(xfield), number = nr)
    three = timeit.timeit(lambda: interpolator.regrid(xfield[myvar]), number = nr)
    four = timeit.timeit(lambda: interpolator.regrid(xfield[myvar], masked = False), number = nr)
    data.append([one, two, three, four])

    #print(filein + ': Exectime SMM Remap (DataSet) ' + str(two/nr))
    #print(filein + ': Exectime SMM Remap (DataArray) ' + str(three/nr))
    #print(filein + ': Exectime SMM Remap (DataSet+NoMask) ' + str(four/nr))

cnames = ['CDO', 'SMM (Dataset)', 'SMM (DataArray)', 'SMM (DataSet+NoMask)']
df = pd.DataFrame(data, index = filelist, columns = cnames)
df.div(df[cnames[0]],axis =0)

client.shutdown()


Unnamed: 0,CDO,SMM (Dataset),SMM (DataArray),SMM (DataSet+NoMask)
onlytos-ipsl.nc,1.0,0.726599,0.427539,0.427731
tas-ecearth.nc,1.0,0.902123,0.841263,0.869179
2t-era5.nc,1.0,0.673339,0.694263,0.64241
tos-fesom.nc,1.0,0.407764,0.405918,0.411269
