In [None]:
import xarray as xr
import pathlib as pl
import pandas as pd
import pywatershed
import os
import dask
import numpy as np

In [None]:
all_models = ['01473000','05431486','09112500','14015000']# Create a list of all cutouts

In [None]:
rootdir = pl.Path('../NHM_extractions/20230110_pois_haj/')# Path to location of cutouts

In [None]:
var_output_files = ['hru_actet.nc', 'recharge.nc', 'soil_rechr.nc', 'snowcov_area.nc', 'seg_outflow.nc',]#output files of interest

### Working currently from a single cutout directory

In [None]:
cm = all_models[0] # sets cutout from list
outvardir = rootdir/ cm / 'output'# stes path to location of NHM output folder where output files are.

In [None]:
# set the file name for the postprocessed model output file that PEST will read
of_name = 'alloutput.dat'# name of file

In [None]:
########### Useful code
#indat
#df.index.dtypes
#xr.__version__

### Slice output to calibration periods for each variable
#### These are as follows from table 2 (Hay and others, 2023):


In [None]:
aet_start = '2000-01-01'
aet_end = '2010-12-31'
recharge_start = '2000-01-01'
recharge_end = '2009-12-31'
runoff_start = '1982-01-01'
runoff_end = '2010-12-31'
soil_rechr_start = '1982-01-01'
soil_rechr_end = '2010-12-31'
sca_start = '2000-01-01'
sca_end = '2010-12-31'

### Actual ET
#### Get and check the daily data

In [None]:
actet_daily = (xr.open_dataset(outvardir / 'hru_actet.nc')['hru_actet']).sel(time=slice(aet_start, aet_end))

#### Post-process daily output to match observation targets of "monthly" and "mean monthly"

In [None]:
#Creates a dataframe time series of monthly values (average daily rate for the month)
actet_monthly = actet_daily.resample(time = 'm').mean()

In [None]:
#actet_monthly

In [None]:
# Creates a dataframe time series of mean monthly (mean of all jan, feb, mar....)
actet_mean_monthly = actet_monthly.groupby('time.month').mean()

In [None]:
### Used for plotting/checking
#df = actet_monthly.to_dataframe()# makes a dataframe for plotting and writing out
#df_r = np.ravel(actet_monthly_df, order = 'C')# flattens the 2D array to a 1D array--just playing
#df[8465:8473]
#df.xs(5625, level=1).plot()

#### Now write values to the template file

In [None]:
inds = [f'{i.year}_{i.month}:{j}' for i in actet_monthly.indexes['time'] for j in actet_monthly.indexes['nhm_id']]# set up the indices in sequence
varvals = np.ravel(actet_monthly, order = 'C')# flattens the 2D array to a 1D array--just playing 

with open(rootdir / cm / of_name, encoding="utf-8", mode='w') as ofp:
    [ofp.write(f'actet_mon:{i}          {j}\n') for i,j in zip(inds,varvals)]

In [None]:
inds = [f'{i}:{j}' for i in actet_mean_monthly.indexes['month'] for j in actet_mean_monthly.indexes['nhm_id']]
varvals =  np.ravel(actet_mean_monthly, order = 'C')# flattens the 2D array to a 1D array 

with open(rootdir / cm / of_name, encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'actet_mean_mon:{i}          {j}\n') for i,j in zip(inds,varvals)]

### Post Process recharge for calibration use
#### Get daily output file from NHM for recharge

In [None]:
recharge_daily = (xr.open_dataset(outvardir / 'recharge.nc')['recharge']).sel(time=slice(recharge_start, recharge_end))

#### Post-process daily output to match observation target of "annual recharge" as an average daily rate for the year

In [None]:
recharge_annual = recharge_daily.resample(time = 'Y').mean()
recharge_annual_norm = (recharge_annual - recharge_annual.min())/(recharge_annual.max()-recharge_annual.min())

#### Write values to template file

In [None]:
inds = [f'{i.year}:{j}' for i in recharge_annual_norm.indexes['time'] for j in recharge_annual_norm.indexes['nhm_id']]
varvals =  np.ravel(recharge_annual_norm, order = 'C')# flattens the 2D array to a 1D array 

with open(rootdir / cm / of_name, encoding="utf-8",mode='a') as ofp:
    [ofp.write(f'recharge_ann:{i}          {j}\n') for i,j in zip(inds,varvals)]

### Post Process "soil_rechr" to compare to target
#### Get daily output file from NHM for soil recharge and normalize 0-1

In [None]:
soil_rechr_daily = (xr.open_dataset(outvardir / 'soil_rechr.nc')['soil_rechr']).sel(time=slice(soil_rechr_start, soil_rechr_end))

In [None]:
#Creates a dataframe time series of monthly values (average daily rate for each month)
soil_rechr_monthly = soil_rechr_daily.resample(time = 'm').mean()
soil_rechr_monthly_norm = (soil_rechr_monthly - soil_rechr_monthly.min())/(soil_rechr_monthly.max()-soil_rechr_monthly.min())

#Creates a dataframe time series of annual values (average daily value for each year)
soil_rechr_annual = soil_rechr_daily.resample(time = 'Y').mean()
soil_rechr_annual_norm = (soil_rechr_annual - soil_rechr_annual.min())/(soil_rechr_annual.max()-soil_rechr_annual.min())

In [None]:
inds = [f'{i.year}_{i.month}:{j}' for i in soil_rechr_monthly_norm.indexes['time'] for j in soil_rechr_monthly_norm.indexes['nhm_id']]
varvals = np.ravel(soil_rechr_monthly_norm, order = 'C')# flattens the 2D array to a 1D array

with open(rootdir / cm / of_name, encoding="utf-8",mode='a') as ofp:
    [ofp.write(f'soil_moist_mon:{i}          {j}\n') for i,j in zip(inds,varvals)]

In [None]:
inds = [f'{i.year}:{j}' for i in soil_rechr_annual_norm.indexes['time'] for j in soil_rechr_annual_norm.indexes['nhm_id']]
varvals =  np.ravel(soil_rechr_annual_norm, order = 'C')# flattens the 2D array to a 1D array 

with open(rootdir / cm / of_name, encoding="utf-8",mode='a') as ofp:
    [ofp.write(f'soil_moist_ann:{i}          {j}\n') for i,j in zip(inds,varvals)]

### Post Process "hru_outflow" to compare to target
#### Get and check the daily data

In [None]:
# These units are in cubic feet (implied per day)
gwres_flow_daily = (xr.open_dataset(outvardir / 'gwres_flow_vol.nc')['gwres_flow_vol']).sel(time=slice(runoff_start, runoff_end))
sroff_daily = (xr.open_dataset(outvardir / 'sroff_vol.nc')['sroff_vol']).sel(time=slice(runoff_start, runoff_end))
ssres_flow_daily = (xr.open_dataset(outvardir / 'ssres_flow_vol.nc')['ssres_flow_vol']).sel(time=slice(runoff_start, runoff_end)) #should be 'hru_outflow, but using a stand-in for now'

In [None]:
# This returns an average daily rate for each month
gwres_flow_monthly = gwres_flow_daily.resample(time = 'm').mean()
sroff_monthly = sroff_daily.resample(time = 'm').mean()
ssres_flow_monthly = ssres_flow_daily.resample(time = 'm').mean()

In [None]:
#This converts the average dailt rate to a rate in cubic feet per second to compare to observation
hru_outflow_rate = (gwres_flow_monthly + sroff_monthly + ssres_flow_monthly)/(24*60*60)

In [None]:
inds = [f'{i.year}_{i.month}:{j}' for i in hru_outflow_rate.indexes['time'] for j in hru_outflow_rate.indexes['nhm_id']]
varvals = np.ravel(hru_outflow_rate, order = 'C')# flattens the 2D array to a 1D array

with open(rootdir / cm / of_name, encoding="utf-8",mode='a') as ofp:
    [ofp.write(f'runoff_mon:{i}          {j}\n') for i,j in zip(inds,varvals)]

### Post Process "snowcov_area" to compare to target
#### Get and check the daily data

In [None]:
snowcov_area_daily = (xr.open_dataset(outvardir / 'snowcov_area.nc')['snowcov_area']).sel(time=slice(sca_start, sca_end))
remove_ja = True #This is used the filter for removing July and August from the dataset

In [None]:
#Applying filter to remove months, July and August, from the dataset so same size as obs data.
if remove_ja:
    snowcov_area_daily_restr = snowcov_area_daily.sel(time=snowcov_area_daily.time.dt.month.isin([1, 2, 3, 4, 5, 6, 9, 10, 11, 12]))
else:
    snowcov_area_daily_restr = snowcov_area_daily
snowcov_area_daily.close()     

In [None]:
inds = [f'{i.year}_{i.month}_{i.day}:{j}' for i in snowcov_area_daily_restr.indexes['time'] for j in snowcov_area_daily_restr.indexes['nhm_id']]
varvals = np.ravel(snowcov_area_daily_restr, order = 'C')# flattens the 2D array to a 1D array

with open(rootdir / cm / of_name, encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'sca_daily:{i}          {j}\n') for i,j in zip(inds,varvals)]