In [1]:
#! pip install -U climetlab --quiet
#! pip install -U climetlab_s2s_ai_challenge --quiet

In [2]:
import climetlab as cml 

In [3]:
import climetlab_s2s_ai_challenge
print(f'Climetlab version : {cml.__version__}')
print(f'Climetlab-s2s-ai-challenge plugin version : {climetlab_s2s_ai_challenge.__version__}')

Climetlab version : 0.6.1
Climetlab-s2s-ai-challenge plugin version : 0.4.18


In [4]:
import os
# When running in continous integration in github,
# append "-dev" to the datasets name to download only a fragment of data
# Warning : do not use the "-dev" datasets for training ML models.
if os.environ.get('GITHUB_ACTIONS'):
    is_test = '-dev'
else:
    is_test = ''

# Using grib data

In [5]:
FORMAT = 'grib'

Let us download netcdf file for total precipitation (tp) for one given date from the training-input dataset :

In [6]:
cmlds = cml.load_dataset("s2s-ai-challenge-training-input"+is_test,
                         origin='ecmwf',
                         date=20200102,
                         parameter='tp',
                         format=FORMAT)

By downloading data from this dataset, you agree to the terms and conditions defined at https://apps.ecmwf.int/datasets/data/s2s/licence/. If you do not agree with such terms, do not download the data. 


                                                                                   

We can iterate on the list of grib data:

In [7]:
for field in list(cmlds)[0:2]:
    print(field)
    print(field.valid_datetime(), field.shape)
    print(field.to_numpy())

GribField(tp,None,20200102,0,0,0)
2020-01-02 00:00:00 (121, 240)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
GribField(tp,None,20200102,0,24,0)
2020-01-03 00:00:00 (121, 240)
[[0.3046875  0.3046875  0.3046875  ... 0.3046875  0.3046875  0.3046875 ]
 [0.28125    0.28125    0.28515625 ... 0.28515625 0.28515625 0.28125   ]
 [0.58203125 0.5703125  0.5546875  ... 0.64453125 0.62890625 0.60546875]
 ...
 [0.         0.         0.         ... 0.0078125  0.0078125  0.00390625]
 [0.0078125  0.0078125  0.0078125  ... 0.01171875 0.01171875 0.0078125 ]
 [0.0859375  0.0859375  0.0859375  ... 0.0859375  0.0859375  0.0859375 ]]


This climetlab dataset can be used as a xarray.Dataset or as a pandas.DataFrame :

In [8]:
cmlds.to_xarray()

Unnamed: 0,Array,Chunk
Bytes,7.52 kB,8 B
Shape,"(20, 47)","(1, 1)"
Count,941 Tasks,940 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 7.52 kB 8 B Shape (20, 47) (1, 1) Count 941 Tasks 940 Chunks Type datetime64[ns] numpy.ndarray",47  20,

Unnamed: 0,Array,Chunk
Bytes,7.52 kB,8 B
Shape,"(20, 47)","(1, 1)"
Count,941 Tasks,940 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.20 GB,116.16 kB
Shape,"(11, 20, 47, 121, 240)","(1, 1, 1, 121, 240)"
Count,20681 Tasks,10340 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.20 GB 116.16 kB Shape (11, 20, 47, 121, 240) (1, 1, 1, 121, 240) Count 20681 Tasks 10340 Chunks Type float32 numpy.ndarray",20  11  240  121  47,

Unnamed: 0,Array,Chunk
Bytes,1.20 GB,116.16 kB
Shape,"(11, 20, 47, 121, 240)","(1, 1, 1, 121, 240)"
Count,20681 Tasks,10340 Chunks
Type,float32,numpy.ndarray


We can get the temperature parameter (2t) in a similar fashion. The "date" and "parameter" arguments also accept lists of values.

In [9]:
cml.load_dataset("s2s-ai-challenge-training-input"+is_test,
                         origin='ecmwf',
                         date=20200102,
                         parameter='2t',
                         format=FORMAT).to_xarray()

                                                                                    

Unnamed: 0,Array,Chunk
Bytes,7.36 kB,8 B
Shape,"(20, 46)","(1, 1)"
Count,921 Tasks,920 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 7.36 kB 8 B Shape (20, 46) (1, 1) Count 921 Tasks 920 Chunks Type datetime64[ns] numpy.ndarray",46  20,

Unnamed: 0,Array,Chunk
Bytes,7.36 kB,8 B
Shape,"(20, 46)","(1, 1)"
Count,921 Tasks,920 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.18 GB,116.16 kB
Shape,"(11, 20, 46, 121, 240)","(1, 1, 1, 121, 240)"
Count,20241 Tasks,10120 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.18 GB 116.16 kB Shape (11, 20, 46, 121, 240) (1, 1, 1, 121, 240) Count 20241 Tasks 10120 Chunks Type float32 numpy.ndarray",20  11  240  121  46,

Unnamed: 0,Array,Chunk
Bytes,1.18 GB,116.16 kB
Shape,"(11, 20, 46, 121, 240)","(1, 1, 1, 121, 240)"
Count,20241 Tasks,10120 Chunks
Type,float32,numpy.ndarray


Data from the forecast-input dataset can be retrieve in a similar fashion:

In [10]:
cml.load_dataset("s2s-ai-challenge-forecast-input"+is_test,
                         origin='ecmwf',
                         date=["20200102","20200109"],
                         parameter='2t',
                         format=FORMAT).to_xarray()

By downloading data from this dataset, you agree to the terms and conditions defined at https://apps.ecmwf.int/datasets/data/s2s/licence/. If you do not agree with such terms, do not download the data. 


                                                                                     

Unnamed: 0,Array,Chunk
Bytes,736 B,8 B
Shape,"(2, 46)","(1, 1)"
Count,186 Tasks,92 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 736 B 8 B Shape (2, 46) (1, 1) Count 186 Tasks 92 Chunks Type datetime64[ns] numpy.ndarray",46  2,

Unnamed: 0,Array,Chunk
Bytes,736 B,8 B
Shape,"(2, 46)","(1, 1)"
Count,186 Tasks,92 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,545.02 MB,116.16 kB
Shape,"(51, 2, 46, 121, 240)","(1, 1, 1, 121, 240)"
Count,14078 Tasks,4692 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 545.02 MB 116.16 kB Shape (51, 2, 46, 121, 240) (1, 1, 1, 121, 240) Count 14078 Tasks 4692 Chunks Type float32 numpy.ndarray",2  51  240  121  46,

Unnamed: 0,Array,Chunk
Bytes,545.02 MB,116.16 kB
Shape,"(51, 2, 46, 121, 240)","(1, 1, 1, 121, 240)"
Count,14078 Tasks,4692 Chunks
Type,float32,numpy.ndarray
