## GRIB: selection using metadata

In [1]:
import earthkit.data as ekd
ekd.download_example_file("tuv_pl.grib")

In [2]:
ds = ekd.from_source("file", "tuv_pl.grib")

In [3]:
len(ds)

18

### Using sel

In [4]:
a = ds.sel(level=500)
a.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll
1,ecmf,u,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll
2,ecmf,v,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


In [5]:
type(a)

earthkit.data.readers.grib.index.GribMaskFieldList

We can use a dict instead of keyword arguments:

In [6]:
a = ds.sel({"level": 500, "shortName": "v"})
a.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,v,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


Lists are accepted:

In [7]:
a = ds.sel(level=[500, 850])
a.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
1,ecmf,u,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
2,ecmf,v,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
3,ecmf,t,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll
4,ecmf,u,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll
5,ecmf,v,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


Slices can define closed intervals, so they are treated as inclusive of both the start and stop values, unlike normal Python indexing:

In [8]:
a = ds.sel(param="t", level=slice(500, 850))
a.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
1,ecmf,t,isobaricInhPa,700,20180801,1200,0,an,0,regular_ll
2,ecmf,t,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


### Using isel

In [9]:
a = ds.isel(level=0)
a.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,300,20180801,1200,0,an,0,regular_ll
1,ecmf,u,isobaricInhPa,300,20180801,1200,0,an,0,regular_ll
2,ecmf,v,isobaricInhPa,300,20180801,1200,0,an,0,regular_ll


In [10]:
a = ds.isel({"level": 2, "shortName": 1})
a.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,u,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


In [11]:
a = ds.isel(level=[2,3], param=0)
a.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,700,20180801,1200,0,an,0,regular_ll
1,ecmf,t,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


Slices are used as in normal Python indexing: 

In [12]:
a = ds.isel(level=slice(2,5), param=0)
a.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
1,ecmf,t,isobaricInhPa,700,20180801,1200,0,an,0,regular_ll
2,ecmf,t,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


### Using order_by

In [13]:
b = a.order_by()
type(b)

earthkit.data.readers.grib.index.GribMaskFieldList

In [14]:
b.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
1,ecmf,t,isobaricInhPa,700,20180801,1200,0,an,0,regular_ll
2,ecmf,t,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


The sorting keys can be specified as a list:

In [15]:
b = a.order_by(["shortName"])
b.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
1,ecmf,t,isobaricInhPa,700,20180801,1200,0,an,0,regular_ll
2,ecmf,t,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


We can prescribe the actual order within a key. It only works when all the possible values are specified:

In [16]:
a = a.order_by(shortName=["v", "t", "u"])
a.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
1,ecmf,t,isobaricInhPa,700,20180801,1200,0,an,0,regular_ll
2,ecmf,t,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


### Combining sel and order_by

In [17]:
a = ds.sel(level=[500, 850]).order_by(["shortName"])
a.ls()

Unnamed: 0,centre,shortName,typeOfLevel,level,dataDate,dataTime,stepRange,dataType,number,gridType
0,ecmf,t,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
1,ecmf,t,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll
2,ecmf,u,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
3,ecmf,u,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll
4,ecmf,v,isobaricInhPa,850,20180801,1200,0,an,0,regular_ll
5,ecmf,v,isobaricInhPa,500,20180801,1200,0,an,0,regular_ll


### Using indices

In [18]:
ds.indices()

{'class': ['od'],
 'stream': ['oper'],
 'levtype': ['pl'],
 'type': ['an'],
 'expver': ['0001'],
 'date': [20180801],
 'time': [1200],
 'domain': ['g'],
 'number': [0],
 'levelist': [300, 400, 500, 700, 850, 1000],
 'param': ['t', 'u', 'v'],
 'level': [300, 400, 500, 700, 850, 1000],
 'shortName': ['t', 'u', 'v']}

We can use the *squeeze* option to see only the keys having more than one values:

In [19]:
ds.indices(squeeze=True)

{'levelist': [300, 400, 500, 700, 850, 1000],
 'param': ['t', 'u', 'v'],
 'level': [300, 400, 500, 700, 850, 1000],
 'shortName': ['t', 'u', 'v']}

In [20]:
ds.index("param")

['t', 'u', 'v']

In [21]:
ds.index("date")

[20180801]

Aliases can be used. E.g. instead of levelist we can use level:

In [22]:
ds.index("level")

[300, 400, 500, 700, 850, 1000]

Count the number of fields for each available level:

In [23]:
for level in ds.index("level"):
    print(f"level={level} len={len(ds.sel(level=level))}")

level=300 len=3
level=400 len=3
level=500 len=3
level=700 len=3
level=850 len=3
level=1000 len=3
