Import section and define compute function

In [1]:
import xarray as xr
import numpy
import cupy
from contexttimer import Timer
from earthkit.data import from_source, from_object

def do_something(da):
    print(f"Multiplying array of shape {da.shape} by 100.")
    with Timer() as time:
        da *= 100
    print(f"... took {time.elapsed*1000:.2f} ms.")

    print(f"Computing the ensemble mean of array of shape {da.shape}.")
    with Timer() as time:
        da.mean(dim="number")
    print(f"... took {time.elapsed*1000:.2f} ms.")

    print(f"Computing the temporal mean of array of shape {da.shape}.")
    with Timer() as time:
        da.mean(dim="step")
    print(f"... took {time.elapsed*1000:.2f} ms.")

    print(f"Adding array to array of shape {da.shape}.")
    with Timer() as time:
        da_2 = da+da
    print(f"... took {time.elapsed*1000:.2f} ms.")

Dataset is lazy-loaded.

In [2]:
%%time
fl = from_source("file", "~/data_ens.grib")
xa = fl.to_xarray()
xa

<module 'numpy' from '/perm/macw/conda/envs/earthkit/lib/python3.10/site-packages/numpy/__init__.py'>


                                                                                                  

CPU times: user 647 ms, sys: 2.05 s, total: 2.7 s
Wall time: 2.83 s


In [3]:
xa.to_fieldlist(None)

2t
number
step
values
[]
[]
<xarray.DataArray '2t' (number: 20, step: 3, values: 6599680)>
[395980800 values with dtype=<class 'numpy.float32'>]
Coordinates:
  * number   (number) int64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
  * step     (step) int64 24 48 72
  * values   (values) int64 0 1 2 3 4 ... 6599676 6599677 6599678 6599680
Attributes:
    metadata:  <earthkit.data.readers.grib.metadata.GribMetadata object at 0x...
{'number': <xarray.DataArray 'number' (number: 20)>
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
       19, 20])
Coordinates:
  * number   (number) int64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20, 'step': <xarray.DataArray 'step' (step: 3)>
array([24, 48, 72])
Coordinates:
  * step     (step) int64 24 48 72}
('number',)
('step',)
<xarray.DataArray 'number' (number: 20)>
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
       19, 20])
Coordinates:
  * number   (number) int64 1 2 3 4 5

ValueError: Metadata object not found in variable. Required for conversion to field list!

In [None]:
da = xa["2t"]
da = da.assign_attrs({"metadata": "meta"})
da.to_netcdf("tmp.nc")

In [None]:
new_fl = from_source("file", "tmp.nc")
new_fl.ls()

Let's use a custom order for the dimensions

In [None]:
%%time
xa = fl.to_xarray(xarray_open_dataset_kwargs={"dims_order": ['step', 'number']})
xa

Now we can load the dataset on the memory through standard xarray load function

In [None]:
%%time
var = xa["2t"].load()
print(f"Array type is ««{type(var.data).__module__}»»")

Load a second time to show it's already there

In [None]:
%time var = xa["2t"].load()

Now we try with cupy

In [None]:
%%time
fl_other = from_source("file", "~/data_ens.grib")
xa_cp = fl_other.to_xarray(xarray_open_dataset_kwargs={"array_module": cupy})
xa_cp

We load

In [None]:
%%time
var_cp = xa_cp["2t"].load()
print(f"Array type is ««{type(var_cp.data).__module__}»»")

Now let's compute something using the numpy based xarray

In [None]:
%time do_something(var)

And now the same thing with the cupy object

In [None]:
%time do_something(var_cp)

Let's do it twice... :O

In [None]:
%time do_something(var_cp)

Now we can even use the xarray earthkit engine.
No need to import earhtkit, the engine is registered when installing earthkit.

In [None]:
%%time
ds = xr.open_dataset("~/data_ens.grib", engine="earthkit", array_module=cupy)
print(ds)