## This notebook does
- Make netCDF file to use neuralhydro's LSTM model

In [1]:
import sys, os
sys.path.append("../src")

from dataio import *
import pandas as pd
import xarray 
from tqdm.notebook import tqdm

In [2]:
def load_basin_data(basin: int) -> pd.DataFrame:
    """Load input and output data from text files."""
    # get data
    idx = int(basin)
    rain_msm = load_basin_msm(idx, "rain").sum(axis=1).rename('rain_msm').groupby(pd.Grouper(level=0, freq='D')).sum()
    rain_gauge = load_rain_gauge(idx).sum(axis=1).rename('rain_gauge').groupby(pd.Grouper(level=0, freq='D')).sum()
    rain_gsmsp = load_gsmap(idx, gauge_ajdusted=False).sum(axis=1).rename('rain_gsmap').groupby(pd.Grouper(level=0, freq='D')).sum()
    rain_gsmap_gauge = load_gsmap(idx, gauge_ajdusted=True).sum(axis=1).rename('rain_gsmap_gauge').groupby(pd.Grouper(freq='D')).sum()

    temp_msm = load_basin_msm(idx, "temp").sum(axis=1).rename('temp_msm').groupby(pd.Grouper(level=0, freq='D')).sum()
    snmlt_te = load_basin_te(idx,var_name="snow_melt",daily=True).sum(axis=1).rename('snmlt_te')
    gsnwl_te = load_basin_te(idx,var_name="snow_amount",daily=True).sum(axis=1).rename('gsnwl_te')

    y = pd.concat(load_dam_discharge(idx)).rename('qobs').groupby(pd.Grouper(level=0, freq='D')).mean()
    df = pd.concat([rain_msm, rain_gauge, rain_gsmsp, rain_gsmap_gauge, temp_msm, snmlt_te, gsnwl_te, y],axis=1)


    start, end = df.index.min(), df.index.max()
    df.reindex(pd.date_range(start,end,freq='D'))
    return df

In [3]:
df = pd.read_pickle('data_details.pkl')
idxs = df[~(df['lack of data']) & (df['network exist']) & ~(df['other dams']) & (df['gauge available'])].index

In [None]:
for basin in tqdm(idxs):
    df = load_basin_data(basin)
    df.index.name = 'date'
    df.loc[df['qobs'] < 0, 'qobs'] = np.nan

    xr = xarray.Dataset.from_dataframe(df)

    xr.to_netcdf(f'./CAMELSJP/time_series/{basin}.nc')