In [None]:
import xarray as xr
import pathlib as pl
import numpy as np
import pandas as pd
import pynhm

### define the model 

In [None]:
all_models = ['01473000', '05431486','09112500','14015000']

In [None]:
rootdir = pl.Path('../NHM_extractions/20230110_pois_haj/')

### make observations dirs in each extraction directory

In [None]:
for cm in all_models:
    if not (rootdir / cm / 'observation_data').exists():
        (rootdir / cm / 'observation_data').mkdir()

### now grab all the `nhm_ids` from the `myparam.param` file for each cutout

In [None]:
nhm_ids = dict(zip(all_models, 
        [pynhm.PrmsParameters.load(rootdir / cm / 'myparam.param').parameters['nhm_id'] for cm in all_models]))

In [None]:
nhm_ids

### assign `wkdir` to indicate where the raw CONUS netCDF files live

In [None]:
wkdir = pl.Path('../../')

In [None]:
lu = pd.read_csv('../Supporting_information/target_and_output_vars_table.csv', index_col=0)
lu

In [None]:
[i for i in wkdir.glob('*.nc')]

### Handle the AET parameters first

In [None]:
AET_all = xr.open_dataset(wkdir / 'baseline_AET_v11.nc')
AET_all

In [None]:
for cm, c_ids in nhm_ids.items():
    c_da = AET_all.sel(nhru=c_ids)
    c_da[['aet_max','aet_min']].to_netcdf(rootdir / cm / 'observation_data' / f'AET_monthly.nc')
    c_da.groupby('time.month').mean().to_netcdf(rootdir / cm / 'observation_data' / f'AET_mean_monthly.nc')

###  runoff

In [None]:
RUN_all = xr.open_dataset(wkdir / 'baseline_RUN_v11.nc')
RUN_all

In [None]:
for cm, c_ids in nhm_ids.items():
    c_da = RUN_all.sel(nhru=c_ids, time=slice('1982-01-01','2010-01-01'))
    c_da[['runoff_mwbm','runoff_min', 'runoff_max']].to_netcdf(rootdir / cm / 'observation_data' / f'RUN_monthly.nc')


### recharge annual
### QUESTION - should these really be summed???

In [None]:
RCH_all = xr.open_dataset(wkdir / 'baseline_RCH_v11.nc')
RCH_all

In [None]:
for cm, c_ids in nhm_ids.items():
    c_da = RCH_all.sel(nhru=c_ids)
    c_da[['recharge_min_norm','recharge_max_norm']].resample(time='1Y').sum().to_netcdf(rootdir / cm / 'observation_data' / f'RCH_annual.nc')


### soil moisture --- annual

In [None]:
SOM_ann_all = xr.open_dataset(wkdir / 'baseline_SOMann_v11.nc')
SOM_ann_all

In [None]:
for cm, c_ids in nhm_ids.items():
    c_da = SOM_ann_all.sel(nhru=c_ids)
    c_da[['soil_moist_min_norm','soil_moist_max_norm']].to_netcdf(rootdir / cm / 'observation_data' / f'Soil_Moisture_annual.nc')


### soil moisture --- monthly

In [None]:
SOM_mon_all = xr.open_dataset(wkdir / 'baseline_SOMann_v11.nc')
SOM_mon_all

In [None]:
for cm, c_ids in nhm_ids.items():
    c_da = SOM_mon_all.sel(nhru=c_ids)
    c_da[['soil_moist_min_norm','soil_moist_max_norm']].to_netcdf(rootdir / cm / 'observation_data' / f'Soil_Moisture_monthly.nc')


### notes from Parker about snow cover calculations

In [None]:
# def get_dataset(filename, f_vars, start_date, end_date):
#     # This routine assumes dimension nhru exists and variable nhm_id exists
#     df = xr.open_dataset(filename)
#     # NOTE: Next line needed if nhm_id variable exists in netcdf file
#     df = df.assign_coords(nhru=df.nhm_id)
#     if isinstance(f_vars, list):
#         df = df[f_vars].sel(time=slice(start_date, end_date))
#     else:
#         df = df[[f_vars]].sel(time=slice(start_date, end_date))
#     return df
# baseline_df = fbc.get_dataset(baseline_file, [sca_var, ci_var, 'nhru'], st_date, en_date)
#     # TODO: 2021-05-05 PAN - Need to check we got the date range we requested
#     if remove_ja:
#         # Remove July and August from the dataset
#         baseline_restr = baseline_df.sel(time=baseline_df.time.dt.month.isin([1, 2, 3, 4, 5, 6, 9, 10, 11, 12]))
#     else:
#         baseline_restr = baseline_df
#     # Create the SCAmask
#     # Compute lower and upper SCA values based on confidence interval
#     threshold = 70.0
#     ci_pct = baseline_restr[ci_var].where(baseline_restr[ci_var] >= threshold)
#     ci_pct /= 100.0
#     # Mask SCA values where CI is masked
#     sca_obs = baseline_restr[sca_var].where(~np.isnan(ci_pct))
#     # Maximum SCA value by HRU
#     msk_SCAmax = sca_obs.max(axis=0)
#     # Number of daily values > 0.0 by HRU
#     msk_num_obs = (sca_obs > 0.0).sum(axis=0)
#     # Number of years of values by HRU
#     msk_num_ann = sca_obs.resample(time='1AS').mean()
#     msk_num_ann = (msk_num_ann > 0).sum(axis=0)
#     # Create SCA mask based on number of years, SCAmax > 0.5, and total number of observations by HRU
#     SCAmask = (msk_num_ann > 1) & (msk_SCAmax > 0.5) & (msk_num_obs > 9)
#     # Lower bound of SCA by HRU
#     baseline_SCAmin = (ci_pct * sca_obs).where(SCAmask)
#     # Upper bound of SCA by HRU
#     baseline_SCAmax = (baseline_SCAmin + (1.0 - ci_pct)).where(SCAmask)

In [None]:
SCA= xr.open_dataset(wkdir / 'baseline_SCA_v11.nc')
SCA

In [None]:
# weights = xr.DataArray(np.zeros(SCA.snow_cover_extent.shape),
#                   dims=['time', 'nhru'],
#                   coords={'time': SCA.time.data,
#                          'nhru': SCA.nhru.data},
#                       name='snow_cover_extent_weights')
