In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import glob
from tqdm import tqdm
import h5py

### Merge train files

In [2]:
mode = 'train'
filenames = glob.glob(f'../data/{mode}_2001/ERA5_tp/*.nc')
ids = [filename[-9:-3] for filename in filenames]

for idx in tqdm(ids, total=len(ids)):
#     with xr.open_dataset(f'../data/{mode}_2001/ERA5_rh/ERA5_rh_cropped_{idx}.nc') as rh_ds:
#         pass
#     with xr.open_dataset(f'../data/{mode}_2001/ERA5_t/ERA5_t_cropped_{idx}.nc') as t_ds:
#         pass
#     with xr.open_dataset(f'../data/{mode}_2001/ERA5_tcw/ERA5_tcw_cropped_{idx}.nc') as tcw_ds:
#         pass
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_tp/ERA5_tp_cropped_{idx}.nc')['tp'] as era5_ds:
        era5_tp = np.array(era5_ds) * 1000
        era5_tp = np.pad(era5_tp, (2,3), constant_values=0)
        era5_tp = np.expand_dims(era5_tp, axis=-1)
    with xr.open_dataset(f'../data/{mode}_2001/MSWEP_tp/MSWEP_tp_cropped_{idx}.nc')['precipitation'] as mswep_ds:
        mswep_tp = np.array(mswep_ds)
        mswep_tp = np.pad(mswep_tp, (2, 3), constant_values=0)
        mswep_tp = np.expand_dims(mswep_tp, axis=-1)
    with h5py.File(f'../data/{mode}_2001/merged_tp_h5/merged_{idx}.h5', 'w') as f:
        f.create_dataset('era5_tp', data=era5_tp)
        f.create_dataset('mswep_tp', data=mswep_tp)

100%|██████████| 80871/80871 [23:17<00:00, 57.86it/s]


### Merge test files

In [4]:
mode = 'test'
filenames = glob.glob(f'../data/{mode}_2001/ERA5_tp/*.nc')
ids = [filename[-9:-3] for filename in filenames]

for idx in tqdm(ids, total=len(ids)):
#     with xr.open_dataset(f'../data/{mode}_2001/ERA5_rh/ERA5_rh_cropped_{idx}.nc') as rh_ds:
#         pass
#     with xr.open_dataset(f'../data/{mode}_2001/ERA5_t/ERA5_t_cropped_{idx}.nc') as t_ds:
#         pass
#     with xr.open_dataset(f'../data/{mode}_2001/ERA5_tcw/ERA5_tcw_cropped_{idx}.nc') as tcw_ds:
#         pass
    with xr.open_dataset(f'../data/{mode}_2001/ERA5_tp/ERA5_tp_cropped_{idx}.nc')['tp'] as tp_ds:
        era5_tp = np.array(era5_tp) * 1000
        era5_tp = np.pad(era5_tp, (2,3), constant_values=0)
        era5_tp = np.expand_dims(era5_tp, axis=-1)
    with xr.open_dataset(f'../data/{mode}_2001/MSWEP_tp/MSWEP_tp_cropped_{idx}.nc')['precipitation'] as mswep_ds:
        mswep_tp = np.array(mswep_ds)
        mswep_tp = np.pad(mswep_tp, (2, 3), constant_values=0)
        mswep_tp = np.expand_dims(mswep_tp, axis=-1)
    with h5py.File(f'../data/{mode}_2001/merged_tp_h5/merged_{idx}.h5', 'w') as f:
        f.create_dataset('era5_tp', data=era5_tp)
        f.create_dataset('mswep_tp', data=mswep_tp)

  0%|          | 0/23321 [00:19<?, ?it/s]


KeyboardInterrupt: 

In [5]:
import tensorflow as tf

filenames = glob.glob(f'../data/train_2001/merged_tp_h5/*.h5')
split_id = int(0.8*len(filenames))
train_filenames = filenames[:split_id]
val_filenames = filenames[split_id:]

# Create a tensorflow dataset from the list of forecast files
train_dataset = tf.data.Dataset.from_tensor_slices(train_filenames)
val_dataset = tf.data.Dataset.from_tensor_slices(val_filenames)

In [10]:
train_dataset

<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [11]:
filenames

['../data/train_2001/merged_tp_h5\\merged_120816.h5',
 '../data/train_2001/merged_tp_h5\\merged_120817.h5',
 '../data/train_2001/merged_tp_h5\\merged_120818.h5',
 '../data/train_2001/merged_tp_h5\\merged_120819.h5',
 '../data/train_2001/merged_tp_h5\\merged_120820.h5',
 '../data/train_2001/merged_tp_h5\\merged_120821.h5',
 '../data/train_2001/merged_tp_h5\\merged_120822.h5',
 '../data/train_2001/merged_tp_h5\\merged_120823.h5',
 '../data/train_2001/merged_tp_h5\\merged_120824.h5',
 '../data/train_2001/merged_tp_h5\\merged_120825.h5',
 '../data/train_2001/merged_tp_h5\\merged_120826.h5',
 '../data/train_2001/merged_tp_h5\\merged_120827.h5',
 '../data/train_2001/merged_tp_h5\\merged_120828.h5',
 '../data/train_2001/merged_tp_h5\\merged_120829.h5',
 '../data/train_2001/merged_tp_h5\\merged_120830.h5',
 '../data/train_2001/merged_tp_h5\\merged_120831.h5',
 '../data/train_2001/merged_tp_h5\\merged_120832.h5',
 '../data/train_2001/merged_tp_h5\\merged_120833.h5',
 '../data/train_2001/merged_