Import section and define compute function

In [None]:
import xarray as xr
import numpy
import cupy
from contexttimer import Timer
from earthkit.data import from_source

def do_something(da):
    print(f"Multiplying array of shape {da.shape} by 100.")
    with Timer() as time:
        da *= 100
    print(f"... took {time.elapsed*1000:.2f} ms.")

    print(f"Computing the ensemble mean of array of shape {da.shape}.")
    with Timer() as time:
        da.mean(dim="number")
    print(f"... took {time.elapsed*1000:.2f} ms.")

    print(f"Computing the temporal mean of array of shape {da.shape}.")
    with Timer() as time:
        da.mean(dim="step")
    print(f"... took {time.elapsed*1000:.2f} ms.")

    print(f"Adding array to array of shape {da.shape}.")
    with Timer() as time:
        da_2 = da+da
    print(f"... took {time.elapsed*1000:.2f} ms.")

Create the Xarray Dataset from a FieldList object. At this point, the data is NOT in memory, lazy-loading.

In [None]:
%%time
fl = from_source("file", "~/data_ens.grib")
xa = fl.to_xarray()
xa

Let's use a custom order for the dimensions (tensor object behind the scene)

In [None]:
%%time
ds = fl.to_xarray(xarray_open_dataset_kwargs={"dims_order": ['step', 'number']})
print(ds["2t"].earthkit.metadata)
ds

Now we can load the dataset on the memory through standard xarray load function, we can see that the type of the data array is numpy by default.

In [None]:
%%time
da = ds["2t"].load()
print(f"Array type is ««{type(da.data).__module__}»»")

Load a second time to show it's already there and that we actually did something in the previous step

In [None]:
%time da = ds["2t"].load()

Let's do the same, but with cupy as the array module

In [None]:
%%time
fl_other = from_source("file", "~/data_ens.grib")
ds_cp = fl_other.to_xarray(xarray_open_dataset_kwargs={"array_module": cupy})
ds_cp

Again, we can load it in memory but this time we see that the data array is a cupy array, meaning the data is on the GPU

In [None]:
%%time
da_cp = ds_cp["2t"].load()
print(f"Array type is ««{type(da_cp.data).__module__}»»")

Now let's compute something and compare timings, starting with the numpy Xarray object:

In [None]:
%time do_something(da)

And we compute the same thing wit the cupy object

In [None]:
%time do_something(da_cp)

Let's do it twice... :-O

In [None]:
%time do_something(da_cp)

In [None]:
da_mean = da.mean(dim="step")
da_mean.earthkit.metadata = da.earthkit.metadata.override(timeRangeIndicator=2, stepRange=f"{int(da.step[0])}-{int(da.step[-1])}")
fl_mean = da_mean.earthkit.to_fieldlist()
fl_mean.head()

We can also save it to grib through the earthkit accessor

In [None]:
da_mean.earthkit.to_grib("test_mean.grib")
fl_mean = from_source("file", "test_mean.grib")
fl_mean.head()

Now we can even use the xarray earthkit engine.
No need to import earhtkit, the engine is registered when installing earthkit.

In [None]:
%%time
ds = xr.open_dataset("~/data_ens.grib", engine="earthkit", array_module=cupy)
print(ds)