In [1]:
import xarray as xr
import pathlib as pl
import pandas as pd
import pywatershed
import os
import dask
import numpy as np

In [2]:
all_models = ['01473000','05431486','09112500','14015000']# Create a list of all cutouts

In [3]:
rootdir = pl.Path('../NHM_extractions/20230110_pois_haj/')# Path to location of cutouts

In [4]:
#var_output_files = ['hru_actet.nc', 'recharge.nc', 'soil_rechr.nc', 'snowcov_area.nc', 'seg_outflow.nc',]#output files of interest

### Working currently from a single cutout directory

In [5]:
cm = all_models[0] # sets cutout from list
outvardir = rootdir/ cm / 'output'# stes path to location of NHM output folder where output files are.

In [6]:
# set the file name for the postprocessed model output file that PEST will read
of_name = 'modelobs.dat'# name of file

In [7]:
########### Useful code
#indat
#df.index.dtypes
#xr.__version__

In [8]:
#all_nc_file = sorted([i for i in (rootdir/ cm / 'output').glob('model_custom_output.nc')])#Read in the files to check

In [9]:
# make a file to hold the consolidated results
ofp = open(rootdir / cm / 'modelobs.dat', 'w') # the 'w' will delete any existing file here and recreate; 'a' appends

In [10]:
modelobsdat  = xr.open_dataset(outvardir / 'model_custom_output.nc')

In [11]:
#modelobsdat

### Slice output to calibration periods for each variable
#### These are as follows from table 2 (Hay and others, 2023):


In [12]:
aet_start = '2000-01-01'
aet_end = '2010-12-31'
recharge_start = '2000-01-01'
recharge_end = '2009-12-31'
runoff_start = '1982-01-01'
runoff_end = '2010-12-31'
soil_rechr_start = '1982-01-01'
soil_rechr_end = '2010-12-31'
sca_start = '2000-01-01'
sca_end = '2010-12-31'
seg_outflow_start = '2000-01-01'
seg_outflow_end = '2010-12-31'

### Actual ET
#### Get and check the daily data

In [13]:
#actet_daily = (xr.open_dataset(outvardir / 'hru_actet.nc')['hru_actet']).sel(time=slice(aet_start, aet_end))
actet_daily = modelobsdat.hru_actet.sel(time=slice(aet_start, aet_end))

In [14]:
#actet_daily

#### Post-process daily output to match observation targets of "monthly" and "mean monthly"

In [15]:
#Creates a dataframe time series of monthly values (average daily rate for the month)
actet_monthly = actet_daily.resample(time = 'm').mean()

In [16]:
#actet_monthly

In [17]:
# Creates a dataframe time series of mean monthly (mean of all jan, feb, mar....)
actet_mean_monthly = actet_monthly.groupby('time.month').mean()

In [18]:
#actet_mean_monthly

In [19]:
### Used for plotting/checking
#df = actet_monthly.to_dataframe()# makes a dataframe for plotting and writing out
#df_r = np.ravel(actet_monthly_df, order = 'C')# flattens the 2D array to a 1D array--just playing
#df[8465:8473]
#df.xs(5625, level=1).plot()

#### Now write values to the template file

In [20]:
inds = [f'{i.year}_{i.month}:{j}' for i in actet_monthly.indexes['time'] for j in actet_monthly['nhm_id'].values]# set up the indices in sequence
varvals = np.ravel(actet_monthly, order = 'C')# flattens the 2D array to a 1D array--just playing 

with open(rootdir / cm / of_name, encoding="utf-8", mode='w') as ofp:
    ofp.write('obsname    obsval\n') # writing a header for the file
    [ofp.write(f'actet_mon:{i}          {j}\n') for i,j in zip(inds,varvals)]

In [21]:
actet_monthly.sel(time='2000-01-31').values # look at a slice of the netcdf and compare to pest write

array([0.01872298, 0.01335401, 0.02401422, 0.02302729, 0.02104963,
       0.02078296, 0.0102255 , 0.00803491, 0.01745123, 0.01889673,
       0.02125189, 0.01306235, 0.01861553, 0.01775   , 0.01272994,
       0.01571974, 0.01441734])

In [22]:
[(i,j) for i,j in zip(inds,varvals)] # playing around and learning adv lists making

[('2000_1:5621', 0.018722979149330198),
 ('2000_1:5625', 0.013354014934519418),
 ('2000_1:5628', 0.024014221169636182),
 ('2000_1:5635', 0.02302728590993128),
 ('2000_1:5637', 0.0210496318661405),
 ('2000_1:5643', 0.020782960068698165),
 ('2000_1:5678', 0.010225496307092958),
 ('2000_1:5679', 0.008034910561232384),
 ('2000_1:5686', 0.017451230870733893),
 ('2000_1:5690', 0.018896731326354506),
 ('2000_1:5693', 0.021251885933652865),
 ('2000_1:5697', 0.01306235112462228),
 ('2000_1:5703', 0.01861552880539323),
 ('2000_1:5728', 0.017749996019908548),
 ('2000_1:7128', 0.012729942847455966),
 ('2000_1:7156', 0.015719744154768397),
 ('2000_1:7157', 0.014417340475473217),
 ('2000_2:5621', 0.044012876260611275),
 ('2000_2:5625', 0.03588178741887825),
 ('2000_2:5628', 0.05329765743755632),
 ('2000_2:5635', 0.04928992969088599),
 ('2000_2:5637', 0.04941284607596243),
 ('2000_2:5643', 0.04697932902909509),
 ('2000_2:5678', 0.029800105728549225),
 ('2000_2:5679', 0.025387249882439445),
 ('2000_2:

In [23]:
inds = [f'{i}:{j}' for i in actet_mean_monthly.indexes['month'] for j in actet_mean_monthly['nhm_id'].values]
varvals =  np.ravel(actet_mean_monthly, order = 'C')# flattens the 2D array to a 1D array 

with open(rootdir / cm / of_name, encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'actet_mean_mon:{i}          {j}\n') for i,j in zip(inds,varvals)]

In [24]:
#actet_mean_monthly

In [25]:
#actet_mean_monthly.sel(month=1).values # look at a slice of the netcdf and compare to pest write file

### Post Process recharge for calibration use
#### Get daily output file from NHM for recharge

In [26]:
#recharge_daily = (xr.open_dataset(outvardir / 'recharge.nc')['recharge']).sel(time=slice(recharge_start, recharge_end))
recharge_daily = modelobsdat.recharge.sel(time=slice(recharge_start, recharge_end))

#### Post-process daily output to match observation target of "annual recharge" as an average daily rate for the year

In [27]:
recharge_annual = recharge_daily.resample(time = 'Y').mean()
recharge_annual_norm = (recharge_annual - recharge_annual.min())/(recharge_annual.max()-recharge_annual.min())

#### Write values to template file

In [28]:
inds = [f'{i.year}:{j}' for i in recharge_annual_norm.indexes['time'] for j in recharge_annual_norm['nhm_id'].values]
varvals =  np.ravel(recharge_annual_norm, order = 'C')# flattens the 2D array to a 1D array 

with open(rootdir / cm / of_name, encoding="utf-8",mode='a') as ofp:
    [ofp.write(f'recharge_ann:{i}          {j}\n') for i,j in zip(inds,varvals)]

In [29]:
recharge_annual_norm

In [30]:
#recharge_annual_norm.sel().values # look at a slice of the netcdf and compare to pest write file

### Post Process "soil_rechr" to compare to target
#### Get daily output file from NHM for soil recharge and normalize 0-1

In [31]:
#soil_rechr_daily = (xr.open_dataset(outvardir / 'soil_rechr.nc')['soil_rechr']).sel(time=slice(soil_rechr_start, soil_rechr_end))
soil_rechr_daily = modelobsdat.soil_rechr.sel(time=slice(soil_rechr_start, soil_rechr_end))

In [32]:
#Creates a dataframe time series of monthly values (average daily rate for each month)
soil_rechr_monthly = soil_rechr_daily.resample(time = 'm').mean()
soil_rechr_monthly_norm = (soil_rechr_monthly - soil_rechr_monthly.min())/(soil_rechr_monthly.max()-soil_rechr_monthly.min())

#Creates a dataframe time series of annual values (average daily value for each year)
soil_rechr_annual = soil_rechr_daily.resample(time = 'Y').mean()
soil_rechr_annual_norm = (soil_rechr_annual - soil_rechr_annual.min())/(soil_rechr_annual.max()-soil_rechr_annual.min())

In [33]:
inds = [f'{i.year}_{i.month}:{j}' for i in soil_rechr_monthly_norm.indexes['time'] for j in soil_rechr_monthly_norm['nhm_id'].values]
varvals = np.ravel(soil_rechr_monthly_norm, order = 'C')# flattens the 2D array to a 1D array

with open(rootdir / cm / of_name, encoding="utf-8",mode='a') as ofp:
    [ofp.write(f'soil_moist_mon:{i}          {j}\n') for i,j in zip(inds,varvals)]

In [34]:
inds = [f'{i.year}:{j}' for i in soil_rechr_annual_norm.indexes['time'] for j in soil_rechr_annual_norm['nhm_id'].values]
varvals =  np.ravel(soil_rechr_annual_norm, order = 'C')# flattens the 2D array to a 1D array 

with open(rootdir / cm / of_name, encoding="utf-8",mode='a') as ofp:
    [ofp.write(f'soil_moist_ann:{i}          {j}\n') for i,j in zip(inds,varvals)]

In [35]:
soil_rechr_annual_norm

In [36]:
#soil_rechr_monthly_norm.sel(time='1982-01-31').values # look at a slice of the netcdf and compare to pest write file
#soil_rechr_annual_norm.sel(time='1982-12-31').values # look at a slice of the netcdf and compare to pest write file

In [37]:
#actet_mean_monthly.sel(month=1).values # look at a slice of the netcdf and compare to pest write file

### Post Process "hru_outflow" to compare to target
#### Get and check the daily data

In [38]:
# These units are in cubic feet (implied per day)
hru_streamflow_out_daily = modelobsdat.hru_streamflow_out.sel(time=slice(runoff_start, runoff_end))

In [39]:
hru_streamflow_out_monthly = hru_streamflow_out_daily.resample(time = 'm').mean()

In [40]:
#This converts the average daily rate to a rate in cubic feet per second to compare to observation
hru_streamflow_out_rate = (hru_streamflow_out_monthly)/(24*60*60)

In [41]:
inds = [f'{i.year}_{i.month}:{j}' for i in hru_streamflow_out_rate.indexes['time'] for j in hru_streamflow_out_rate['nhm_id'].values]
varvals = np.ravel(hru_streamflow_out_rate, order = 'C')# flattens the 2D array to a 1D array

with open(rootdir / cm / of_name, encoding="utf-8",mode='a') as ofp:
    [ofp.write(f'runoff_mon:{i}          {j}\n') for i,j in zip(inds,varvals)]

In [42]:
#hru_streamflow_out_rate

In [43]:
#hru_streamflow_out_rate.sel(time='1982-01-31T00:00:00.000000000').values # look at a slice of the netcdf and compare to pest write file

### Post Process "snowcov_area" to compare to target
#### Get and check the daily data

In [44]:
#snowcov_area_daily = (xr.open_dataset(outvardir / 'snowcov_area.nc')['snowcov_area']).sel(time=slice(sca_start, sca_end))
snowcov_area_daily = modelobsdat.snowcov_area.sel(time=slice(sca_start, sca_end))
remove_ja = True #This is used the filter for removing July and August from the dataset

In [45]:
#Applying filter to remove months, July and August, from the dataset so same size as obs data.
if remove_ja:
    snowcov_area_daily_restr = snowcov_area_daily.sel(time=snowcov_area_daily.time.dt.month.isin([1, 2, 3, 4, 5, 6, 9, 10, 11, 12]))
else:
    snowcov_area_daily_restr = snowcov_area_daily
snowcov_area_daily.close()     

In [46]:
inds = [f'{i.year}_{i.month}_{i.day}:{j}' for i in snowcov_area_daily_restr.indexes['time'] for j in snowcov_area_daily_restr['nhm_id'].values]
varvals = np.ravel(snowcov_area_daily_restr, order = 'C')# flattens the 2D array to a 1D array

with open(rootdir / cm / of_name, encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'sca_daily:{i}          {j}\n') for i,j in zip(inds,varvals)]

### Get the daily streamflow values from segments associated with the gage pois

In [47]:
# Get seg_outflow data
#seg_outflow_daily = (xr.open_dataset(outvardir / 'seg_outflow.nc')['seg_outflow']).sel(time=slice(seg_outflow_start, seg_outflow_end),nhm_seg= poi_gage_nhm_seg)
seg_outflow_daily = modelobsdat.seg_outflow.sel(time=slice(seg_outflow_start, seg_outflow_end))

In [48]:
seg_outflow_daily

In [49]:
inds = [f'{i.year}_{i.month}_{i.day}:{j}' for j in seg_outflow_daily['poi_gages'].values for i in seg_outflow_daily.indexes['time']]
varvals = np.ravel(seg_outflow_daily, order = 'F')# flattens the 2D array to a 1D array

with open(rootdir / cm / of_name, encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'streamflow_daily:{i}          {j}\n') for i,j in zip(inds,varvals)]

In [50]:
seg_outflow_daily

In [51]:
#seg_outflow_daily.sel(npoi_gages='01472198').values # look at a slice of the netcdf and compare to pest write file

#### Post-process daily output to match observation targets of "monthly" and "mean monthly"

In [52]:
#Creates a dataframe time series of monthly values (average daily rate for the month)
seg_outflow_monthly = seg_outflow_daily.resample(time = 'm').mean()

In [53]:
# Creates a dataframe time series of mean monthly (mean of all jan, feb, mar....)
seg_outflow_mean_monthly = seg_outflow_monthly.groupby('time.month').mean()

In [54]:
#Now write to the pest obs file
inds = [f'{i.year}_{i.month}:{j}' for j in seg_outflow_monthly['poi_gages'].values for i in seg_outflow_monthly.indexes['time'] ]# set up the indices in sequence
varvals = np.ravel(seg_outflow_monthly, order = 'F')# flattens the 2D array to a 1D array--just playing 

with open(rootdir / cm / of_name, encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'streamflow_mon:{i}          {j}\n') for i,j in zip(inds,varvals)]

In [55]:
inds = [f'{i}:{j}' for j in seg_outflow_mean_monthly['poi_gages'].values for i in seg_outflow_mean_monthly.indexes['month'] ]
varvals =  np.ravel(seg_outflow_mean_monthly, order = 'F')# flattens the 2D array to a 1D array 

with open(rootdir / cm / of_name, encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'streamflow_mean_mon:{i}          {j}\n') for i,j in zip(inds,varvals)]