In [None]:
import xarray as xr
import numpy as np
import hvplot.xarray
import hvplot as hv
from pathlib import Path

hv.extension('bokeh')

In [None]:
import dask
from dask.distributed import Client
import os

client = Client(n_workers=20, threads_per_worker=1, memory_limit='12GB')
client

# Pre-processing

ERA-5 Total precipitation data comes in hourly time-step. This section creates a daily time-step dataset by summing the hourly values and saves them for each year.

In [None]:
era5_hourly_dir = Path('../../data-precip-analysis/era5/raw')
era5_hourly_fs = era5_hourly_dir.glob("*.nc")

era5_hourly_ds = xr.open_mfdataset(era5_hourly_fs, combine='by_coords')
# era5_hourly_ds = era5_hourly_ds.chunk({'time1': 720, 'lat': 500, 'lon': 500})
era5_hourly_ds

In [None]:
subset = False
start_date = '2019-01-01'
end_date = '2023-04-30'

if subset:
    era5_hourly_ds = era5_hourly_ds.sel(time1=slice(start_date, end_date))

In [None]:
era5_daily = era5_hourly_ds['precipitation_amount_1hour_Accumulation'].resample(time1='1D').sum()

era5_daily = era5_daily.rename({"time1": "time"})

# udpate attributes
era5_daily.attrs['units'] = 'm/day'
era5_daily.attrs['long_name'] = 'Total daily accumulated precipitation'

del era5_daily.attrs['nameECMWF']
del era5_daily.attrs['shortNameECMWF']
del era5_daily.attrs['nameCDM']
del era5_daily.attrs['product_type']

era5_daily = era5_daily.to_dataset(name='precip')
era5_daily = era5_daily.chunk({"time": "auto", "lat": "auto", "lon": "auto"})
era5_daily

In [None]:
era5_daily_dir = Path("../../data-precip-analysis/era5/daily_precip")
era5_daily_dir.mkdir(exist_ok=True, parents=True)

years, datasets = zip(*era5_daily.groupby("time.year"))
paths = [era5_daily_dir / f"{y}.nc" for y in years]

In [None]:
datasets[0]

In [None]:
xr.save_mfdataset(datasets, paths, encoding={'precip': {'zlib': True, 'complevel': 6}})

# Creating combined nc

In [None]:
CHUNKS_TLL = {'time': 10, 'lat': 500, 'lon': 500}
CHUNKS_LL = {'lat': 50, 'lon': 50}

In [None]:
era5_dir = Path(f"../../data-era5/era5/daily_precip")
era5_fns = era5_dir.glob("*.nc")

era5_ds = xr.open_mfdataset(era5_fns, chunks=CHUNKS_TLL, combine='by_coords')
era5_ds

In [None]:
precip = era5_ds['precip'].transpose('time', 'lat', 'lon').chunk(CHUNKS_TLL) * 1000 # convert m/day to mm/day

precip.coords['lon'] = (precip.coords['lon'] + 180) % 360 - 180
precip = precip.astype(np.float64)
precip.attrs['units'] = 'mm/day'
precip.attrs['standard_name'] = 'precipitation_amount'
precip.attrs['long_name'] = 'Total daily accumulated precipitation'

if 'spatial_ref' in precip.coords:
    precip = precip.drop('spatial_ref')
precip

In [None]:
precip.sel(time=slice('2017-01-01', '2017-12-31')).hvplot(groupby='time', x='lon', y='lat')

In [None]:
existing_ds_fn = Path("../../data-cumberland/tennessee/basins/cumberland/pre_processing/nc/combined_data.nc")
existing_ds = xr.open_dataset(existing_ds_fn)
existing_ds

In [None]:
precip = precip.interp(lat=existing_ds['lat'], lon=existing_ds['lon'], method='nearest')
precip = precip.sel(time=slice(existing_ds['time'].min().values, existing_ds['time'].max().values))
precip

In [None]:
existing_ds['precip']

In [None]:
new_ds = existing_ds.copy(deep=True)

new_ds['precip'] = precip

new_ds

In [None]:
existing_ds['precip'].sel(time='2017-01-01').hvplot()

In [None]:
new_ds['precip'].sel(time='2017-01-01').hvplot()

In [None]:
dst_fp = Path('../../data-era5/tennessee/basins/cumberland/pre_processing/nc/combined_data.nc')
dst_fp.parent.mkdir(exist_ok=True, parents=True)
new_ds.to_netcdf(dst_fp)