In [6]:
import cdsapi
import pandas as pd

def generate_lead_times_hours(start_h: int, end_h: int, freq_h: int) -> list[str]:
    lead_times = []
    for h in range(start_h, end_h+1, freq_h):
        lead_times.append(f"{h:03}")
    return lead_times

start = pd.Timedelta("1w")
end = pd.Timedelta("7w")
freq = pd.Timedelta("12h")

start_h = int(start.total_seconds() / 3600)
end_h = int(end.total_seconds() / 3600)
freq_h = int(freq.total_seconds() / 3600)

# 12-hourly from 7 to 49 days
lead_times = generate_lead_times_hours(start_h, end_h, freq_h) 

dataset = "seasonal-original-single-levels"

# 1 year, 2 months, 1 day, W2-W7
request = {
    "originating_centre": "ecmwf",
    "system": "51",
    "variable": [
        "2m_temperature",
    ],
    "year": ["2022"], 
    "month": [
        "01", "02"
    ],
    "day": ["01"],
    "leadtime_hour": lead_times, 
    "data_format": "netcdf",
}

request

{'originating_centre': 'ecmwf',
 'system': '51',
 'variable': ['2m_temperature'],
 'year': ['2022'],
 'month': ['01', '02'],
 'day': ['01'],
 'leadtime_hour': ['168',
  '180',
  '192',
  '204',
  '216',
  '228',
  '240',
  '252',
  '264',
  '276',
  '288',
  '300',
  '312',
  '324',
  '336',
  '348',
  '360',
  '372',
  '384',
  '396',
  '408',
  '420',
  '432',
  '444',
  '456',
  '468',
  '480',
  '492',
  '504',
  '516',
  '528',
  '540',
  '552',
  '564',
  '576',
  '588',
  '600',
  '612',
  '624',
  '636',
  '648',
  '660',
  '672',
  '684',
  '696',
  '708',
  '720',
  '732',
  '744',
  '756',
  '768',
  '780',
  '792',
  '804',
  '816',
  '828',
  '840',
  '852',
  '864',
  '876',
  '888',
  '900',
  '912',
  '924',
  '936',
  '948',
  '960',
  '972',
  '984',
  '996',
  '1008',
  '1020',
  '1032',
  '1044',
  '1056',
  '1068',
  '1080',
  '1092',
  '1104',
  '1116',
  '1128',
  '1140',
  '1152',
  '1164',
  '1176'],
 'data_format': 'netcdf'}

In [None]:
client = cdsapi.Client()
client.retrieve(dataset, request, target="test.seas5.nc").download()

2024-11-19 11:25:33,519 INFO Request ID is 4b0aabe0-588a-438b-b359-4d4480a5af26
2024-11-19 11:25:33,695 INFO status has been updated to accepted


In [19]:
import xarray as xr 
import os 
import glob

root = "/scratch-shared/ewalt/era5-cds-downloader/"

# get all files 'seas5_cds_10m_u_component_of_wind_*.nc'
files = glob.glob(os.path.join(root, "seas5_cds_10m_u_component_of_wind-*.nc"))
files

['/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2022_03-2022_04-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2022_07-2022_08-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2021_01-2021_02-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2021_07-2021_08-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2022_05-2022_06-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2021_05-2021_06-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2023_01-2023_02-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2021_03-2021_04-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_comp

In [20]:
valid_files = []
for f in files:
    try:
        ds = xr.open_dataset(f)
        if "latitude" in ds.coords:
            valid_files.append(f)
    except:
        pass
    
valid_files

['/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2021_01-2021_02-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2023_01-2023_02-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2022_01-2022_02-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2024_01-2024_02-6h-global-360x181.nc']

In [21]:
ds = xr.open_mfdataset(valid_files, combine="by_coords", chunks={})

In [4]:
ds

Unnamed: 0,Array,Chunk
Bytes,10.56 kiB,2.64 kiB
Shape,"(8, 169)","(2, 169)"
Dask graph,4 chunks in 9 graph layers,4 chunks in 9 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 10.56 kiB 2.64 kiB Shape (8, 169) (2, 169) Dask graph 4 chunks in 9 graph layers Data type datetime64[ns] numpy.ndarray",169  8,

Unnamed: 0,Array,Chunk
Bytes,10.56 kiB,2.64 kiB
Shape,"(8, 169)","(2, 169)"
Dask graph,4 chunks in 9 graph layers,4 chunks in 9 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.74 GiB,8.83 MiB
Shape,"(51, 8, 169, 181, 360)","(13, 1, 43, 46, 90)"
Dask graph,2048 chunks in 9 graph layers,2048 chunks in 9 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 16.74 GiB 8.83 MiB Shape (51, 8, 169, 181, 360) (13, 1, 43, 46, 90) Dask graph 2048 chunks in 9 graph layers Data type float32 numpy.ndarray",8  51  360  181  169,

Unnamed: 0,Array,Chunk
Bytes,16.74 GiB,8.83 MiB
Shape,"(51, 8, 169, 181, 360)","(13, 1, 43, 46, 90)"
Dask graph,2048 chunks in 9 graph layers,2048 chunks in 9 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [None]:
#ds.to_netcdf("test.mfdataset.seas5.nc")
ds.to_zarr("test.mfdataset.seas5.zarr", mode="w")

In [None]:
!source ../env/modules.sh && cdo mergetime valid_files[0] valid_files[1] test.mergetime.seas5.nc

/bin/bash: line 1: cdo: command not found


In [5]:
ds = xr.open_dataset("test.mfdataset.seas5.zarr", engine="zarr", chunks={})
ds

Unnamed: 0,Array,Chunk
Bytes,10.56 kiB,2.64 kiB
Shape,"(8, 169)","(2, 169)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 10.56 kiB 2.64 kiB Shape (8, 169) (2, 169) Dask graph 4 chunks in 2 graph layers Data type datetime64[ns] numpy.ndarray",169  8,

Unnamed: 0,Array,Chunk
Bytes,10.56 kiB,2.64 kiB
Shape,"(8, 169)","(2, 169)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.74 GiB,8.83 MiB
Shape,"(51, 8, 169, 181, 360)","(13, 1, 43, 46, 90)"
Dask graph,2048 chunks in 2 graph layers,2048 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 16.74 GiB 8.83 MiB Shape (51, 8, 169, 181, 360) (13, 1, 43, 46, 90) Dask graph 2048 chunks in 2 graph layers Data type float32 numpy.ndarray",8  51  360  181  169,

Unnamed: 0,Array,Chunk
Bytes,16.74 GiB,8.83 MiB
Shape,"(51, 8, 169, 181, 360)","(13, 1, 43, 46, 90)"
Dask graph,2048 chunks in 2 graph layers,2048 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [26]:
"forecast_reference_time" in ds

True

In [50]:
str(list(ds.coords.keys()))+","+str(list(ds.data_vars.keys()))

"['number', 'forecast_reference_time', 'forecast_period', 'latitude', 'longitude', 'valid_time'],['u10']"

In [37]:
valid_files

['/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2021_01-2021_02-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2023_01-2023_02-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2022_01-2022_02-6h-global-360x181.nc',
 '/scratch-shared/ewalt/era5-cds-downloader/seas5_cds_10m_u_component_of_wind-2024_01-2024_02-6h-global-360x181.nc']

In [46]:
i = 7
ds.forecast_reference_time.values[i], ds.valid_time.values[i,:10]

(numpy.datetime64('2024-02-01T00:00:00.000000000'),
 array(['2024-02-08T00:00:00.000000000', '2024-02-08T06:00:00.000000000',
        '2024-02-08T12:00:00.000000000', '2024-02-08T18:00:00.000000000',
        '2024-02-09T00:00:00.000000000', '2024-02-09T06:00:00.000000000',
        '2024-02-09T12:00:00.000000000', '2024-02-09T18:00:00.000000000',
        '2024-02-10T00:00:00.000000000', '2024-02-10T06:00:00.000000000'],
       dtype='datetime64[ns]'))

In [51]:
print(str(list(ds.coords.keys()))+","+str(list(ds.data_vars.keys())))

['number', 'forecast_reference_time', 'forecast_period', 'latitude', 'longitude', 'valid_time'],['u10']


In [52]:
ds.number

In [57]:
3+11+5*11

69