In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import glob
from tqdm import tqdm

### Check train files

In [1]:
mode = 'train'
filenames = glob.glob(f'../data/{mode}_2001/ERA5_tp/*.nc')
ids = [filename[-9:-3] for filename in filenames]

for idx in tqdm(ids, total=len(ids)):
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_rh/ERA5_rh_cropped_{idx}.nc') as rh_ds:
        rh_lons = rh_ds.longitude
        rh_time = rh_ds.time
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_t/ERA5_t_cropped_{idx}.nc') as t_ds:
        t_lons = t_ds.longitude
        t_time = t_ds.time
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_tcw/ERA5_tcw_cropped_{idx}.nc') as tcw_ds:
        tcw_lons = tcw_ds.longitude
        tcw_time = tcw_ds.time
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_tp/ERA5_tp_cropped_{idx}.nc')*1000 as tp_ds:
        tp_lons = tp_ds.longitude
        tp_time = tp_ds.time
    with xr.open_dataset(f'../data/{mode}_2001/MSWEP_tp/MSWEP_tp_cropped_{idx}.nc') as mswep_ds:
        pass
    
    assert (tp_lons == rh_lons).all(), 'Lons do not match!'
    assert (tp_lons == t_lons).all(), 'Lons do not match!'
    assert (tp_lons == tcw_lons).all(), 'Lons do not match!'
    
    assert(tp_time == rh_time), 'Time does not match!'
    assert(tp_time == t_time), 'Time does not match!'
    assert(tp_time == tcw_time), 'Time does not match!'

100%|██████████| 80871/80871 [1:01:20<00:00, 21.97it/s]


### Check test files

In [2]:
mode = 'test'
filenames = glob.glob(f'../data/{mode}_2001/ERA5_tp/*.nc')
ids = [filename[-9:-3] for filename in filenames]

for idx in tqdm(ids, total=len(ids)):
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_rh/ERA5_rh_cropped_{idx}.nc') as rh_ds:
        rh_lons = rh_ds.longitude
        rh_time = rh_ds.time
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_t/ERA5_t_cropped_{idx}.nc') as t_ds:
        t_lons = t_ds.longitude
        t_time = t_ds.time
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_tcw/ERA5_tcw_cropped_{idx}.nc') as tcw_ds:
        tcw_lons = tcw_ds.longitude
        tcw_time = tcw_ds.time
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_tp/ERA5_tp_cropped_{idx}.nc')*1000 as tp_ds:
        tp_lons = tp_ds.longitude
        tp_time = tp_ds.time
    with xr.open_dataset(f'../data/{mode}_2001/MSWEP_tp/MSWEP_tp_cropped_{idx}.nc') as mswep_ds:
        pass
    
    assert (tp_lons == rh_lons).all(), 'Lons do not match!'
    assert (tp_lons == t_lons).all(), 'Lons do not match!'
    assert (tp_lons == tcw_lons).all(), 'Lons do not match!'
    
    assert(tp_time == rh_time), 'Time does not match!'
    assert(tp_time == t_time), 'Time does not match!'
    assert(tp_time == tcw_time), 'Time does not match!'

100%|██████████| 23321/23321 [18:05<00:00, 21.48it/s]


### Merge train files

In [3]:
mode = 'train'
filenames = glob.glob(f'../data/{mode}_2001/ERA5_tp/*.nc')
ids = [filename[-9:-3] for filename in filenames]

for idx in tqdm(ids, total=len(ids)):
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_rh/ERA5_rh_cropped_{idx}.nc') as rh_ds:
        pass
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_t/ERA5_t_cropped_{idx}.nc') as t_ds:
        pass
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_tcw/ERA5_tcw_cropped_{idx}.nc') as tcw_ds:
        pass
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_tp/ERA5_tp_cropped_{idx}.nc')*1000 as tp_ds:
        pass
    with xr.open_dataset(f'../data/{mode}_2001/MSWEP_tp/MSWEP_tp_cropped_{idx}.nc') as mswep_ds:
        pass
    merged_ds = xr.merge([rh_ds, t_ds, tcw_ds, tp_ds, mswep_ds])
#     merged_ds = xr.merge([tp_ds, mswep_ds])
    merged_ds = merged_ds.drop_vars(['lon', 'lat'])
    merged_ds.to_netcdf(f'../data/{mode}_2001/merged/merged_{idx}.nc')

100%|██████████| 80871/80871 [1:15:50<00:00, 17.77it/s]


### Merge test files

In [2]:
mode = 'test'
filenames = glob.glob(f'../data/{mode}_2001/ERA5_tp/*.nc')
ids = [filename[-9:-3] for filename in filenames]

for idx in tqdm(ids, total=len(ids)):
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_rh/ERA5_rh_cropped_{idx}.nc') as rh_ds:
        pass
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_t/ERA5_t_cropped_{idx}.nc') as t_ds:
        pass
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_tcw/ERA5_tcw_cropped_{idx}.nc') as tcw_ds:
        pass
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_tp/ERA5_tp_cropped_{idx}.nc')*1000 as tp_ds:
        pass
    with xr.open_dataset(f'../data/{mode}_2001/MSWEP_tp/MSWEP_tp_cropped_{idx}.nc') as mswep_ds:
        pass
    merged_ds = xr.merge([rh_ds, t_ds, tcw_ds, tp_ds, mswep_ds])
#     merged_ds = xr.merge([tp_ds, mswep_ds])
    merged_ds = merged_ds.drop_vars(['lon', 'lat'])
    merged_ds.to_netcdf(f'../data/{mode}_2001/merged/merged_{idx}.nc')

100%|██████████| 23321/23321 [24:34<00:00, 15.82it/s]
