## interpolation of node to cell centers

xarray has routines for interpolating data... 

In [9]:
from yt_xarray.sample_data import load_random_xr_data
import numpy as np 
import scipy
from dask import array as da
import xarray as xr

In [8]:


fields = {'temperature': ('x', 'y', 'z'), 'pressure': ('x', 'y', 'z')}
dims = {'x': (0,1,15), 'y': (0, 1, 10), 'z': (0, 1, 15)}
ds_xr = load_random_xr_data(fields, dims, length_unit='m')
ds_xr

In [2]:
ds_xr.temperature.x.isel({"x": slice(0, 4)})

In [21]:
def select_yt_cell_centers(ds_xr, si, ei, field, 
                           method="select_then_interp",                          ):
    # si, ei : cell center si, ei

    xr_field = getattr(ds_xr, field)
    ei_node = ei + np.array([1,1,1])

    i_select_dict = {}    
    for idim, dim in enumerate(("x", "y", "z")):
        i_select_dict[dim] = slice(si[idim], ei_node[idim])        

    if method == "select_then_interp":
        # option 1: subselect then interpolation : dask safe!
        # but if not using linear interpolation, would need to
        # consider ghost nodes to avoid boundary effects between
        # chunks.
        data_subselection = xr_field.isel(i_select_dict).load()
        interp_dict = {}
        for idim, dim in enumerate(("x", "y", "z")):
            dim_vals = getattr(data_subselection, dim).values
            interp_dict[dim] = (dim_vals[1:] + dim_vals[:-1])/2.

        return data_subselection.interp(interp_dict).values
    
    elif method == "direct_interp":
        # option 2: interpolate without subselecting first : this
        # likely wont scale super well for memory use, but would
        # be po
        interp_dict = {}
        for idim, dim in enumerate(("x", "y", "z")):
            dim_vals = getattr(xr_field, dim).isel({dim:i_select_dict[dim]}).values
            interp_dict[dim] = (dim_vals[1:] + dim_vals[:-1])/2.
            
        return xr_field.interp(interp_dict).values     
    

whole-grid selection:

In [4]:
ds_xr.temperature.shape

(15, 10, 15)

In [5]:
si = np.array([0, 0, 0])
ei = np.array(ds_xr.temperature.shape) - 1
fld = "temperature"
data_s_then_i = select_yt_cell_centers(ds_xr, si, ei, fld, method="direct_interp")
data_i = select_yt_cell_centers(ds_xr, si, ei, fld, method="select_then_interp")

In [6]:
data_s_then_i.shape, data_i.shape

((14, 9, 14), (14, 9, 14))

In [61]:
%%timeit
meth = "direct_interp"
data = select_yt_cell_centers(ds_xr, si, ei, fld, method="direct_interp")
data.shape

4.02 ms ± 84.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [62]:
%%timeit
meth = "select_then_interp"
data = select_yt_cell_centers(ds_xr, si, ei, fld, method="select_then_interp")
data.shape

3.76 ms ± 44.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [63]:
def build_and_select(nx, ny ,nz, method):

    fields = {'temperature': ('x', 'y', 'z'), 
              'pressure': ('x', 'y', 'z')}
    dims = {'x': (0,1,nx), 'y': (0, 1, ny), 'z': (0, 1, nz)}
    ds_xr = load_random_xr_data(fields, dims, length_unit='m')

    # select the whole grid 
    si = np.array([0, 0, 0])
    ei = np.array(ds_xr.temperature.shape) - 1
    
        
    data = select_yt_cell_centers(ds_xr, 
                                  si, 
                                  ei, 
                                  'temperature',
                                 method=method)
    return data

In [64]:
data_s_then_i = build_and_select(100, 100, 100, "select_then_interp")

In [65]:
data_i = build_and_select(100, 100, 100, "direct_interp")

In [66]:
%%timeit
data_i = build_and_select(100, 100, 100, "direct_interp")

40.2 ms ± 1.57 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [67]:
%%timeit
data_i = build_and_select(100, 100, 100, "select_then_interp")

41.3 ms ± 1.06 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [68]:
%%timeit
data_i = build_and_select(300, 300, 300, "direct_interp")

1.07 s ± 20.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [69]:
%%timeit
data_i = build_and_select(300, 300, 300, "select_then_interp")

1.12 s ± 32.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [70]:
%%timeit
data_i = build_and_select(500, 400, 600, "direct_interp")

5.4 s ± 140 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [71]:
%%timeit
data_i = build_and_select(500, 400, 600, "select_then_interp")

5.41 s ± 89.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [52]:
from dask import array as da


In [28]:
nx = 500
ny = 400
nz = 600
chunks = 50
data_vals = da.random.random((nx,ny,nz), chunks=chunks)
coords = {'x': np.linspace(0,1, nx), 
          'y': np.linspace(0,1,ny),
          'z': np.linspace(0,1,nz)
         }
field_dims = ("x", "y", "z")
data = xr.DataArray(data_vals, 
                           coords=coords, 
                           dims=field_dims)

xr_ds = xr.Dataset({"temperature": data})

In [31]:
T = xr_ds.temperature.isel({'x':slice(0,20), 'y':slice(0,21), 'z':slice(0,19)})
T

Unnamed: 0,Array,Chunk
Bytes,62.34 kiB,62.34 kiB
Shape,"(20, 21, 19)","(20, 21, 19)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 62.34 kiB 62.34 kiB Shape (20, 21, 19) (20, 21, 19) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",19  21  20,

Unnamed: 0,Array,Chunk
Bytes,62.34 kiB,62.34 kiB
Shape,"(20, 21, 19)","(20, 21, 19)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [32]:
T_in_mem = T.load()  # 

In [33]:
si = np.array([0, 0, 0])
ei = np.array([11,12,13])
fld = "temperature"
data_i = select_yt_cell_centers(xr_ds, si, ei, fld, method="select_then_interp")
data_i.shape

(11, 12, 13)

In [34]:
data_s_then_i = select_yt_cell_centers(ds_xr, si, ei, fld, method="direct_interp")
data_s_then_i.shape

(11, 12, 13)

In [35]:

fields = {'temperature': ('x', 'y', 'z'), 'pressure': ('x', 'y', 'z')}
dims = {'x': (0,1,95), 'y': (0, 1, 42), 'z': (0, 1, 35)}
ds_xr = load_random_xr_data(fields, dims, length_unit='m')
T = ds_xr.temperature.isel({'x':slice(0,20), 'y':slice(0,21), 'z':slice(0,19)})

In [75]:
si = np.array([0, 0, 0])
ei = np.array(ds_xr.temperature.shape) - 1
fld = "temperature"


In [76]:
x = [1,2,3,4]
xr_ds.temperature.interp({"x": coords['x'][0:5], 
                          "y": coords['y'][0:5], 
                          "z": coords['z'][0:5], })

NotImplementedError: Don't yet support nd fancy indexing

In [79]:
type(xr_ds.temperature)

xarray.core.dataarray.DataArray

In [82]:
xr_ds.temperature.data

Unnamed: 0,Array,Chunk
Bytes,915.53 MiB,0.95 MiB
Shape,"(500, 400, 600)","(50, 50, 50)"
Dask graph,960 chunks in 1 graph layer,960 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 915.53 MiB 0.95 MiB Shape (500, 400, 600) (50, 50, 50) Dask graph 960 chunks in 1 graph layer Data type float64 numpy.ndarray",600  400  500,

Unnamed: 0,Array,Chunk
Bytes,915.53 MiB,0.95 MiB
Shape,"(500, 400, 600)","(50, 50, 50)"
Dask graph,960 chunks in 1 graph layer,960 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
