In [11]:
import logging
import os.path
import argparse
from typing import Any, Union

# External modules
import xarray as xr
import numpy as np
from tqdm import tqdm

# Internal modules

def get_regridder(
        grid_in: xr.Dataset, out_res: float, method='bilinear'
) -> Any:
    import xesmf as xe
    grid_out = xr.Dataset(
        {
            'lat': (['lat'], np.arange(-90+out_res/2, 90, out_res)),
            'lon': (['lon'], np.arange(0, 360, out_res)),
        }
    )
    regridder = xe.Regridder(grid_in, grid_out, method=method,
                             periodic=True, reuse_weights=False)
    return regridder


def _regrid_loop(
        regridder: Any,
        ds: xr.Dataset,
        chunk_size: Union[int, None] = None
) -> xr.Dataset:
    if chunk_size is None:
        chunk_size = len(ds['time'])
    ds_regridded = []
    regrid_pbar = tqdm(range(0, len(ds['time']), chunk_size))
    for chunk in regrid_pbar:
        tmp_regridded = regridder(
            ds.isel(time=slice(chunk, chunk+chunk_size))
        )
        ds_regridded.append(tmp_regridded)
    ds_regridded = xr.concat(ds_regridded, dim='time')
    for var in ds_regridded:
        ds_regridded[var].attrs = ds[var].attrs
    ds_regridded.attrs = ds.attrs
    return ds_regridded


def regrid_ds(
        ds_ens: Union[xr.Dataset, None],
        chunk_size: Union[int, None] = None
) -> Union[xr.Dataset, None]:
    if ds_ens is not None:
        ds_ens = ds_ens.rename({'latitude': 'lat', 'longitude': 'lon'})
        grid_in = ds_ens[['lat', 'lon']]
        regridder = get_regridder(grid_in, out_res=5.625)
        ds_regridded = _regrid_loop(regridder, ds_ens, chunk_size)
    else:
        ds_regridded = None
    return ds_regridded

In [12]:
import xarray as xr
import distributed


In [13]:
cluster = distributed.LocalCluster(n_workers=4, threads_per_worker=1, local_directory='/tmp')
client = distributed.Client(cluster)
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 57290 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:57290/status,

0,1
Dashboard: http://127.0.0.1:57290/status,Workers: 4
Total threads: 4,Total memory: 15.34 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:57291,Workers: 4
Dashboard: http://127.0.0.1:57290/status,Total threads: 4
Started: Just now,Total memory: 15.34 GiB

0,1
Comm: tcp://127.0.0.1:57315,Total threads: 1
Dashboard: http://127.0.0.1:57318/status,Memory: 3.84 GiB
Nanny: tcp://127.0.0.1:57294,
Local directory: d:\tmp\dask-scratch-space\worker-r8hp0qsl,Local directory: d:\tmp\dask-scratch-space\worker-r8hp0qsl

0,1
Comm: tcp://127.0.0.1:57312,Total threads: 1
Dashboard: http://127.0.0.1:57313/status,Memory: 3.84 GiB
Nanny: tcp://127.0.0.1:57295,
Local directory: d:\tmp\dask-scratch-space\worker-pcpvy06_,Local directory: d:\tmp\dask-scratch-space\worker-pcpvy06_

0,1
Comm: tcp://127.0.0.1:57316,Total threads: 1
Dashboard: http://127.0.0.1:57317/status,Memory: 3.84 GiB
Nanny: tcp://127.0.0.1:57296,
Local directory: d:\tmp\dask-scratch-space\worker-_cdr7n84,Local directory: d:\tmp\dask-scratch-space\worker-_cdr7n84

0,1
Comm: tcp://127.0.0.1:57321,Total threads: 1
Dashboard: http://127.0.0.1:57322/status,Memory: 3.84 GiB
Nanny: tcp://127.0.0.1:57297,
Local directory: d:\tmp\dask-scratch-space\worker-ybhvm5ho,Local directory: d:\tmp\dask-scratch-space\worker-ybhvm5ho


# Load IFS data

In [14]:
ds_ifs_pl = xr.open_dataset(r'D:\Projects\NIOT\v1\ensemble_transformer\scripts\data\delhi\pl.nc',chunks={'time': 2})
ds_ifs_sfc = xr.open_dataset(r'D:\Projects\NIOT\v1\ensemble_transformer\scripts\data\delhi\sfc.nc', chunks={'time': 2})

In [15]:
ds_ifs_pl

Unnamed: 0,Array,Chunk
Bytes,16.69 MiB,137.81 kiB
Shape,"(248, 10, 2, 21, 21)","(2, 10, 2, 21, 21)"
Dask graph,124 chunks in 2 graph layers,124 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.69 MiB 137.81 kiB Shape (248, 10, 2, 21, 21) (2, 10, 2, 21, 21) Dask graph 124 chunks in 2 graph layers Data type float64 numpy.ndarray",10  248  21  21  2,

Unnamed: 0,Array,Chunk
Bytes,16.69 MiB,137.81 kiB
Shape,"(248, 10, 2, 21, 21)","(2, 10, 2, 21, 21)"
Dask graph,124 chunks in 2 graph layers,124 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.69 MiB,137.81 kiB
Shape,"(248, 10, 2, 21, 21)","(2, 10, 2, 21, 21)"
Dask graph,124 chunks in 2 graph layers,124 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.69 MiB 137.81 kiB Shape (248, 10, 2, 21, 21) (2, 10, 2, 21, 21) Dask graph 124 chunks in 2 graph layers Data type float64 numpy.ndarray",10  248  21  21  2,

Unnamed: 0,Array,Chunk
Bytes,16.69 MiB,137.81 kiB
Shape,"(248, 10, 2, 21, 21)","(2, 10, 2, 21, 21)"
Dask graph,124 chunks in 2 graph layers,124 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [16]:
ds_ifs_gh = ds_ifs_pl['gh'].to_dataset("level")
ds_ifs_gh = ds_ifs_gh.rename({500: 'gh_500', 850: 'gh_850'})

In [17]:
ds_ifs_pl

Unnamed: 0,Array,Chunk
Bytes,16.69 MiB,137.81 kiB
Shape,"(248, 10, 2, 21, 21)","(2, 10, 2, 21, 21)"
Dask graph,124 chunks in 2 graph layers,124 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.69 MiB 137.81 kiB Shape (248, 10, 2, 21, 21) (2, 10, 2, 21, 21) Dask graph 124 chunks in 2 graph layers Data type float64 numpy.ndarray",10  248  21  21  2,

Unnamed: 0,Array,Chunk
Bytes,16.69 MiB,137.81 kiB
Shape,"(248, 10, 2, 21, 21)","(2, 10, 2, 21, 21)"
Dask graph,124 chunks in 2 graph layers,124 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.69 MiB,137.81 kiB
Shape,"(248, 10, 2, 21, 21)","(2, 10, 2, 21, 21)"
Dask graph,124 chunks in 2 graph layers,124 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.69 MiB 137.81 kiB Shape (248, 10, 2, 21, 21) (2, 10, 2, 21, 21) Dask graph 124 chunks in 2 graph layers Data type float64 numpy.ndarray",10  248  21  21  2,

Unnamed: 0,Array,Chunk
Bytes,16.69 MiB,137.81 kiB
Shape,"(248, 10, 2, 21, 21)","(2, 10, 2, 21, 21)"
Dask graph,124 chunks in 2 graph layers,124 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [18]:
ds_ifs_t = ds_ifs_pl['t'].to_dataset("level")
ds_ifs_t = ds_ifs_t.rename({500: 't_500', 850: 't_850'})

In [19]:
ds_ifs_t['longitude']

In [20]:
ds_ifs_merged = xr.merge([ds_ifs_sfc, ds_ifs_t, ds_ifs_gh])

In [21]:
ds_ifs_merged = ds_ifs_merged.isel(latitude=slice(None, None, -1))
ds_ifs_merged = ds_ifs_merged.rename({'number': 'ensemble'})

In [22]:
ds_ifs_merged.to_netcdf("testout.nc", engine="netcdf4")

PermissionError: [Errno 13] Permission denied: 'd:\\Projects\\NIOT\\v1\\ensemble_transformer\\scripts\\data\\testout.nc'

In [None]:
ds_ifs_merged[['latitude', 'longitude']]

In [34]:
ds_ifs_merged[['time']]

In [32]:
ds_ifs_train = ds_ifs_merged.sel(time=slice('2020-12-01', '2022-12-31'))

In [None]:
ds_ifs_train

Unnamed: 0,Array,Chunk
Bytes,6.12 MiB,68.91 kiB
Shape,"(182, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,91 chunks in 4 graph layers,91 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 6.12 MiB 68.91 kiB Shape (182, 10, 21, 21) (2, 10, 21, 21) Dask graph 91 chunks in 4 graph layers Data type float64 numpy.ndarray",182  1  21  21  10,

Unnamed: 0,Array,Chunk
Bytes,6.12 MiB,68.91 kiB
Shape,"(182, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,91 chunks in 4 graph layers,91 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.12 MiB,68.91 kiB
Shape,"(182, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,91 chunks in 5 graph layers,91 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 6.12 MiB 68.91 kiB Shape (182, 10, 21, 21) (2, 10, 21, 21) Dask graph 91 chunks in 5 graph layers Data type float64 numpy.ndarray",182  1  21  21  10,

Unnamed: 0,Array,Chunk
Bytes,6.12 MiB,68.91 kiB
Shape,"(182, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,91 chunks in 5 graph layers,91 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.12 MiB,68.91 kiB
Shape,"(182, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,91 chunks in 5 graph layers,91 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 6.12 MiB 68.91 kiB Shape (182, 10, 21, 21) (2, 10, 21, 21) Dask graph 91 chunks in 5 graph layers Data type float64 numpy.ndarray",182  1  21  21  10,

Unnamed: 0,Array,Chunk
Bytes,6.12 MiB,68.91 kiB
Shape,"(182, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,91 chunks in 5 graph layers,91 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.12 MiB,68.91 kiB
Shape,"(182, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,91 chunks in 5 graph layers,91 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 6.12 MiB 68.91 kiB Shape (182, 10, 21, 21) (2, 10, 21, 21) Dask graph 91 chunks in 5 graph layers Data type float64 numpy.ndarray",182  1  21  21  10,

Unnamed: 0,Array,Chunk
Bytes,6.12 MiB,68.91 kiB
Shape,"(182, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,91 chunks in 5 graph layers,91 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6.12 MiB,68.91 kiB
Shape,"(182, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,91 chunks in 5 graph layers,91 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 6.12 MiB 68.91 kiB Shape (182, 10, 21, 21) (2, 10, 21, 21) Dask graph 91 chunks in 5 graph layers Data type float64 numpy.ndarray",182  1  21  21  10,

Unnamed: 0,Array,Chunk
Bytes,6.12 MiB,68.91 kiB
Shape,"(182, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,91 chunks in 5 graph layers,91 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [23]:
ds_ifs_train = ds_ifs_merged.sel(time=slice('2020-12-01', '2022-12-31'))
ds_ifs_train.to_zarr(
    '../data/processed/ifs/ds_train',
    encoding={
        't2m': {'dtype': 'float32', 'scale_factor': 1.0, 'add_offset': 0.0},
        't_500': {'dtype': 'float32', 'scale_factor': 1.0, 'add_offset': 0.0},
        't_850': {'dtype': 'float32', 'scale_factor': 1.0, 'add_offset': 0.0},
        'gh_500': {'dtype': 'float32', 'scale_factor': 1.0, 'add_offset': 0.0},
        'gh_850': {'dtype': 'float32', 'scale_factor': 1.0, 'add_offset': 0.0},
    }
)

ContainsGroupError: path '' contains a group

In [43]:
y = ds_ifs_train.sel(time=slice('2021-01-12'))

In [44]:
y

Unnamed: 0,Array,Chunk
Bytes,2.09 MiB,68.91 kiB
Shape,"(62, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,31 chunks in 5 graph layers,31 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.09 MiB 68.91 kiB Shape (62, 10, 21, 21) (2, 10, 21, 21) Dask graph 31 chunks in 5 graph layers Data type float64 numpy.ndarray",62  1  21  21  10,

Unnamed: 0,Array,Chunk
Bytes,2.09 MiB,68.91 kiB
Shape,"(62, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,31 chunks in 5 graph layers,31 chunks in 5 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.09 MiB,68.91 kiB
Shape,"(62, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,31 chunks in 6 graph layers,31 chunks in 6 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.09 MiB 68.91 kiB Shape (62, 10, 21, 21) (2, 10, 21, 21) Dask graph 31 chunks in 6 graph layers Data type float64 numpy.ndarray",62  1  21  21  10,

Unnamed: 0,Array,Chunk
Bytes,2.09 MiB,68.91 kiB
Shape,"(62, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,31 chunks in 6 graph layers,31 chunks in 6 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.09 MiB,68.91 kiB
Shape,"(62, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,31 chunks in 6 graph layers,31 chunks in 6 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.09 MiB 68.91 kiB Shape (62, 10, 21, 21) (2, 10, 21, 21) Dask graph 31 chunks in 6 graph layers Data type float64 numpy.ndarray",62  1  21  21  10,

Unnamed: 0,Array,Chunk
Bytes,2.09 MiB,68.91 kiB
Shape,"(62, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,31 chunks in 6 graph layers,31 chunks in 6 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.09 MiB,68.91 kiB
Shape,"(62, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,31 chunks in 6 graph layers,31 chunks in 6 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.09 MiB 68.91 kiB Shape (62, 10, 21, 21) (2, 10, 21, 21) Dask graph 31 chunks in 6 graph layers Data type float64 numpy.ndarray",62  1  21  21  10,

Unnamed: 0,Array,Chunk
Bytes,2.09 MiB,68.91 kiB
Shape,"(62, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,31 chunks in 6 graph layers,31 chunks in 6 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.09 MiB,68.91 kiB
Shape,"(62, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,31 chunks in 6 graph layers,31 chunks in 6 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.09 MiB 68.91 kiB Shape (62, 10, 21, 21) (2, 10, 21, 21) Dask graph 31 chunks in 6 graph layers Data type float64 numpy.ndarray",62  1  21  21  10,

Unnamed: 0,Array,Chunk
Bytes,2.09 MiB,68.91 kiB
Shape,"(62, 10, 21, 21)","(2, 10, 21, 21)"
Dask graph,31 chunks in 6 graph layers,31 chunks in 6 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [37]:
ds_ifs_test = ds_ifs_merged.sel(time=slice('2023-12-01', '2023-12-31'))
ds_ifs_test.to_zarr(
    '../data/processed/ifs/ds_test',
    encoding={
        't2m': {'dtype': 'float32', 'scale_factor': 1.0, 'add_offset': 0.0},
        't_500': {'dtype': 'float32', 'scale_factor': 1.0, 'add_offset': 0.0},
        't_850': {'dtype': 'float32', 'scale_factor': 1.0, 'add_offset': 0.0},
        'gh_500': {'dtype': 'float32', 'scale_factor': 1.0, 'add_offset': 0.0},
        'gh_850': {'dtype': 'float32', 'scale_factor': 1.0, 'add_offset': 0.0},
    }
)

ContainsGroupError: path '' contains a group

In [38]:
ds_ifs_test[['time']]

In [4]:
import xarray as xr
import numpy as np

In [5]:
from typing import Union


In [6]:
z = xr.open_dataset('testout.nc')[['t2m']]

In [9]:
y = xr.open_dataset('out.nc')[['t2m']]

In [11]:
z

In [17]:
def load_merge_data(era5_dir: str) -> xr.Dataset:
    ds_t2m = xr.open_dataset(
        os.path.join(era5_dir, 't2m_raw.nc')
    ).sel() .chunk({'time': 10})
    
    ds_t850 = xr.open_dataset(
        os.path.join(era5_dir, 't850_raw.nc')
    ).sel() .chunk({'time': 10})
    ds_z500 = xr.open_dataset(
        os.path.join(era5_dir, 'z500_raw.nc')
    ).sel() .chunk({'time': 10})
    print('Got data')
    merged_ds = xr.merge([ds_z500, ds_t850, ds_t2m])
    print('Merged datasets')
    merged_ds = merged_ds.rename({'latitude': 'lat', 'longitude': 'lon'})
    print(merged_ds)
    print('Cleaned data')
    return merged_ds

In [26]:
y = load_merge_data(r"D:\Projects\NIOT\v1\ensemble_transformer\scripts\data\raw")

Got data
Merged datasets
<xarray.Dataset> Size: 657kB
Dimensions:  (lon: 21, lat: 21, time: 62)
Coordinates:
  * lon      (lon) float32 84B 78.0 78.25 78.5 78.75 ... 82.25 82.5 82.75 83.0
  * lat      (lat) float32 84B 15.0 14.75 14.5 14.25 ... 10.75 10.5 10.25 10.0
  * time     (time) datetime64[ns] 496B 2023-12-01 ... 2023-12-31T12:00:00
Data variables:
    z        (time, lat, lon) float64 219kB dask.array<chunksize=(10, 21, 21), meta=np.ndarray>
    t        (time, lat, lon) float64 219kB dask.array<chunksize=(10, 21, 21), meta=np.ndarray>
    t2m      (time, lat, lon) float64 219kB dask.array<chunksize=(10, 21, 21), meta=np.ndarray>
Attributes:
    Conventions:  CF-1.6
    history:      2024-06-12 07:50:10 GMT by grib_to_netcdf-2.28.1: /opt/ecmw...
Cleaned data


In [27]:
y

Unnamed: 0,Array,Chunk
Bytes,213.61 kiB,34.45 kiB
Shape,"(62, 21, 21)","(10, 21, 21)"
Dask graph,7 chunks in 2 graph layers,7 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 213.61 kiB 34.45 kiB Shape (62, 21, 21) (10, 21, 21) Dask graph 7 chunks in 2 graph layers Data type float64 numpy.ndarray",21  21  62,

Unnamed: 0,Array,Chunk
Bytes,213.61 kiB,34.45 kiB
Shape,"(62, 21, 21)","(10, 21, 21)"
Dask graph,7 chunks in 2 graph layers,7 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,213.61 kiB,34.45 kiB
Shape,"(62, 21, 21)","(10, 21, 21)"
Dask graph,7 chunks in 2 graph layers,7 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 213.61 kiB 34.45 kiB Shape (62, 21, 21) (10, 21, 21) Dask graph 7 chunks in 2 graph layers Data type float64 numpy.ndarray",21  21  62,

Unnamed: 0,Array,Chunk
Bytes,213.61 kiB,34.45 kiB
Shape,"(62, 21, 21)","(10, 21, 21)"
Dask graph,7 chunks in 2 graph layers,7 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,213.61 kiB,34.45 kiB
Shape,"(62, 21, 21)","(10, 21, 21)"
Dask graph,7 chunks in 2 graph layers,7 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 213.61 kiB 34.45 kiB Shape (62, 21, 21) (10, 21, 21) Dask graph 7 chunks in 2 graph layers Data type float64 numpy.ndarray",21  21  62,

Unnamed: 0,Array,Chunk
Bytes,213.61 kiB,34.45 kiB
Shape,"(62, 21, 21)","(10, 21, 21)"
Dask graph,7 chunks in 2 graph layers,7 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [29]:
ds_ifs_merged = 

In [28]:
ds_ifs_merged = ds_ifs_merged.rename({'latitude': 'lat', 'longitude': 'lon'})

In [29]:
def get_regridder(grid_in: xr.Dataset, out_res: float, method='bilinear'):
    import xesmf as xe
    grid_out = xr.Dataset(
        {
            'lat': (['lat'], np.arange(-90+out_res/2, 90, out_res)),
            'lon': (['lon'], np.arange(0, 360, out_res)),
        }
    )
    regridder = xe.Regridder(grid_in, grid_out, method=method,
                             periodic=True, reuse_weights=False, ignore_degenerate=False)
    return regridder


def _regrid_loop(
        regridder,
        ds: xr.Dataset,
        chunk_size: Union[int, None] = None
) -> xr.Dataset:
    if chunk_size is None:
        chunk_size = len(ds['time'])
    ds_regridded = []
    regrid_pbar = range(0, len(ds['time']), chunk_size)
    for chunk in regrid_pbar:
        tmp_regridded = regridder(
            ds.isel(time=slice(chunk, chunk+chunk_size))
        )
        ds_regridded.append(tmp_regridded)
    ds_regridded = xr.concat(ds_regridded, dim='time')
    for var in ds_regridded:
        ds_regridded[var].attrs = ds[var].attrs
    ds_regridded.attrs = ds.attrs
    return ds_regridded


def regrid_ds(
        ds_ens: Union[xr.Dataset, None],
        chunk_size: Union[int, None] = None
) -> Union[xr.Dataset, None]:
    if ds_ens is not None:
        grid_in = ds_ens[['lat', 'lon']]
        print(grid_in)
        regridder = get_regridder(grid_in, out_res=5.625)
        ds_regridded = _regrid_loop(regridder, ds_ens, chunk_size)
    else:
        ds_regridded = None
    return ds_regridded

In [30]:
y = regrid_ds(ds_ifs_merged, chunk_size=100)

<xarray.Dataset> Size: 168B
Dimensions:  (lat: 21, lon: 21)
Coordinates:
  * lat      (lat) float32 84B 10.0 10.25 10.5 10.75 ... 14.25 14.5 14.75 15.0
  * lon      (lon) float32 84B 78.0 78.25 78.5 78.75 ... 82.25 82.5 82.75 83.0
Data variables:
    *empty*
Attributes:
    Conventions:  CF-1.6
    history:      2024-06-18 15:14:25 GMT by grib_to_netcdf-2.33.2: /opt/ecmw...


In [31]:
ds_ifs_merged = y

In [32]:
ds_ifs_merged

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 8 graph layers,31 chunks in 8 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 9.69 MiB 320.00 kiB Shape (62, 10, 32, 64) (2, 10, 32, 64) Dask graph 31 chunks in 8 graph layers Data type float64 numpy.ndarray",62  1  64  32  10,

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 8 graph layers,31 chunks in 8 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 9.69 MiB 320.00 kiB Shape (62, 10, 32, 64) (2, 10, 32, 64) Dask graph 31 chunks in 9 graph layers Data type float64 numpy.ndarray",62  1  64  32  10,

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 9.69 MiB 320.00 kiB Shape (62, 10, 32, 64) (2, 10, 32, 64) Dask graph 31 chunks in 9 graph layers Data type float64 numpy.ndarray",62  1  64  32  10,

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 9.69 MiB 320.00 kiB Shape (62, 10, 32, 64) (2, 10, 32, 64) Dask graph 31 chunks in 9 graph layers Data type float64 numpy.ndarray",62  1  64  32  10,

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 9.69 MiB 320.00 kiB Shape (62, 10, 32, 64) (2, 10, 32, 64) Dask graph 31 chunks in 9 graph layers Data type float64 numpy.ndarray",62  1  64  32  10,

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [33]:
ds_ifs_merged = ds_ifs_merged.rename({'lat': 'latitude', 'lon': 'longitude'})

In [34]:
ds_ifs_merged

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 8 graph layers,31 chunks in 8 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 9.69 MiB 320.00 kiB Shape (62, 10, 32, 64) (2, 10, 32, 64) Dask graph 31 chunks in 8 graph layers Data type float64 numpy.ndarray",62  1  64  32  10,

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 8 graph layers,31 chunks in 8 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 9.69 MiB 320.00 kiB Shape (62, 10, 32, 64) (2, 10, 32, 64) Dask graph 31 chunks in 9 graph layers Data type float64 numpy.ndarray",62  1  64  32  10,

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 9.69 MiB 320.00 kiB Shape (62, 10, 32, 64) (2, 10, 32, 64) Dask graph 31 chunks in 9 graph layers Data type float64 numpy.ndarray",62  1  64  32  10,

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 9.69 MiB 320.00 kiB Shape (62, 10, 32, 64) (2, 10, 32, 64) Dask graph 31 chunks in 9 graph layers Data type float64 numpy.ndarray",62  1  64  32  10,

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 9.69 MiB 320.00 kiB Shape (62, 10, 32, 64) (2, 10, 32, 64) Dask graph 31 chunks in 9 graph layers Data type float64 numpy.ndarray",62  1  64  32  10,

Unnamed: 0,Array,Chunk
Bytes,9.69 MiB,320.00 kiB
Shape,"(62, 10, 32, 64)","(2, 10, 32, 64)"
Dask graph,31 chunks in 9 graph layers,31 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
