### This notebook will read in the consolidated NC files that were written with notebook  `Subset_NHM_baselines`for each subbasin extraction, assign names for each obs, and write names and observations into a single file with 2 columns for PEST++ to read.

In [None]:
import xarray as xr
import pathlib as pl
import numpy as np
import pandas as pd
import pywatershed

### Designate the list on subabsin extractions and the root directory that contains them.

In [None]:
all_models = ['01473000', '05431486','09112500','14015000']# Used later when automating loop

In [None]:
rootdir = pl.Path('../NHM_extractions/20230110_pois_haj/')

### For now, working for now in a single subasin extraction and will automate later.

In [None]:
cm = all_models[3]
obsdir = rootdir/ cm / 'observation_data'#This is where the observation files for each extraction were written.

In [None]:
all_nc_files = sorted([i for i in (rootdir/ cm / 'observation_data').glob('*.nc')])#Read in the files to check

In [None]:
#all_nc_files #Checks all the subset observation files from the CONUS NHM outputs

In [None]:
# make a file to hold the consolidated results
ofp = open(rootdir / cm / 'allobs.dat', 'w') # the 'w' will delete any existing file here and recreate; 'a' appends

In [None]:
##  AET  monthly (Note that these values are in inches/day, and a daily average rate for the month--Jacob verified)
cdat  = xr.open_dataset(obsdir / 'AET_monthly.nc')
# set up the indices in sequence
inds = [f'{i.year}_{i.month}:{j}' for i in cdat.indexes['time'] for j in cdat.indexes['nhru']]

#aet_monthly_obs = (cdat.aet_max + cdat.aet_min)/2#calculates mean value using aet_max and aet_min
#varvals =  np.ravel(aet_monthly_obs, order = 'C')# flattens the 2D array to a 1D array 

#with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
#    ofp.write('obsname    obsval\n') # writing a header for the file
#    [ofp.write(f'actet_mon:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]


#sets the non-penalized condition to less than the max value
l_max_actet_mon = cdat.aet_max
varvals =  np.ravel(l_max_actet_mon, order = 'C')# flattens the 2D array to a 1D array 
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    ofp.write('obsname    obsval\n') # writing a header for the file
    [ofp.write(f'l_max_actet_mon:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]


#sets the non-penalized condition to greater than the min value
g_min_actet_mon = cdat.aet_min
varvals =  np.ravel(g_min_actet_mon, order = 'C')# flattens the 2D array to a 1D array 
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'g_min_actet_mon:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]


In [None]:
#aet_mean_obs
#aet_monthly_obs.sel(time= '2000-01-01') # look at a slice of the netcdf and compare to pest write

In [None]:
##  AET mean monthly
cdat  = xr.open_dataset(obsdir / 'AET_mean_monthly.nc')
# set up the indices in sequence
inds = [f'{i}:{j}' for i in cdat.indexes['month'] for j in cdat.indexes['nhru']]

#aet_mean_obs = (cdat.aet_min + cdat.aet_max)/2#calculates mean value using aet_max and aet_min
#varvals =  np.ravel(aet_mean_obs, order = 'C')# flattens the 2D array to a 1D array 

#with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
#    [ofp.write(f'actet_mean_mon:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]


l_max_actet_mean_mon = cdat.aet_max
varvals =  np.ravel(l_max_actet_mean_mon, order = 'C')# flattens the 2D array to a 1D array 
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'l_max_actet_mean_mon:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]


#sets the non-penalized condition to greater than the min value
g_min_actet_mean_mon = cdat.aet_min
varvals =  np.ravel(g_min_actet_mean_mon, order = 'C')# flattens the 2D array to a 1D array 
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'g_min_actet_mean_mon:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]




In [None]:
#aet_mean_obs.sel(month= 1) 

In [None]:
##  RCH  annual
cdat  = xr.open_dataset(obsdir / 'RCH_annual.nc')
# set up the indices in sequence
inds = [f'{i.year}:{j}' for i in cdat.indexes['time'] for j in cdat.indexes['nhru']]

# get the variable names
#dvs = list(cdat.keys())
#recharge_mean_obs = (cdat.recharge_max_norm + cdat.recharge_min_norm)/2#calculates mean value using aet_max and aet_min

#varvals =  np.ravel(recharge_mean_obs, order = 'C')# flattens the 2D array to a 1D array 
#with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
#    [ofp.write(f'recharge_ann:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]


l_max_recharge_ann = cdat.recharge_max_norm
varvals =  np.ravel(l_max_recharge_ann, order = 'C')# flattens the 2D array to a 1D array 
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'l_max_recharge_ann:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]

g_min_recharge_ann = cdat.recharge_min_norm
varvals =  np.ravel(g_min_recharge_ann, order = 'C')# flattens the 2D array to a 1D array 
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'g_min_recharge_ann:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]




In [None]:
#recharge_mean_obs.sel(time='2000-01-01')

In [None]:
##  Soil Moisture  monthly
cdat  = xr.open_dataset(obsdir / 'Soil_Moisture_monthly.nc')
# set up the indices in sequence
inds = [f'{i.year}_{i.month}:{j}' for i in cdat.indexes['time'] for j in cdat.indexes['nhru']]

# get the variable names
#dvs = list(cdat.keys())
#soil_moist_mean_obs = (cdat.soil_moist_max_norm + cdat.soil_moist_min_norm)/2#calculates mean value using aet_max and aet_min

#varvals =  np.ravel(soil_moist_mean_obs, order = 'C')# flattens the 2D array to a 1D array
#with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
#    [ofp.write(f'soil_moist_mon:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]

l_max_soil_moist_mon = cdat.soil_moist_max_norm
varvals =  np.ravel(l_max_soil_moist_mon, order = 'C')# flattens the 2D array to a 1D array
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'l_max_soil_moist_mon:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]

g_min_soil_moist_mon = cdat.soil_moist_min_norm
varvals =  np.ravel(g_min_soil_moist_mon, order = 'C')# flattens the 2D array to a 1D array
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'g_min_soil_moist_mon:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]




In [None]:
#soil_moist_mean_obs.sel(time='1982-01-01')

In [None]:
##  Soil_Moisture annual
cdat  = xr.open_dataset(obsdir / 'Soil_Moisture_annual.nc')
# set up the indices in sequence
inds = [f'{i.year}:{j}' for i in cdat.indexes['time'] for j in cdat.indexes['nhru']]

# get the variable names
#dvs = list(cdat.keys())

#soil_moist_mean_obs = (cdat.soil_moist_max_norm + cdat.soil_moist_min_norm)/2#calculates mean value using aet_max and aet_min

#varvals =  np.ravel(soil_moist_mean_obs, order = 'C')# flattens the 2D array to a 1D array
#with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
#    [ofp.write(f'soil_moist_ann:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]


l_max_soil_moist_ann = cdat.soil_moist_max_norm 
varvals =  np.ravel(l_max_soil_moist_ann, order = 'C')# flattens the 2D array to a 1D array
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'l_max_soil_moist_ann:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]


g_min_soil_moist_ann = cdat.soil_moist_min_norm 
varvals =  np.ravel(g_min_soil_moist_ann, order = 'C')# flattens the 2D array to a 1D array
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'g_min_soil_moist_ann:{i}          {j}\n') for i,j in zip(inds,varvals, strict=True)]




In [None]:
#soil_moist_mean_obs.sel(time='1982-01-01')

In [None]:
cdat

In [None]:
##  RUN  monthly (This is an average daily rate in cfs for the month)
cdat  = xr.open_dataset(obsdir / 'hru_streamflow_monthly.nc')
# set up the indices in sequence
inds = [f'{i.year}_{i.month}:{j}' for i in cdat.indexes['time'] for j in cdat.indexes['nhru']]

# get the variable names
#varvals =  np.ravel(cdat.runoff_mwbm, order = 'C')# flattens the 2D array to a 1D array
#with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
#    [ofp.write(f'runoff_mon:{i}          {j}\n') for i,j in zip(inds,varvals,strict=True)]

l_max_runoff_mon = cdat.runoff_max 
varvals =  np.ravel(l_max_runoff_mon, order = 'C')# flattens the 2D array to a 1D array
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'l_max_runoff_mon:{i}          {j}\n') for i,j in zip(inds,varvals,strict=True)]

g_min_runoff_mon = cdat.runoff_min 
varvals =  np.ravel(g_min_runoff_mon, order = 'C')# flattens the 2D array to a 1D array
with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'g_min_runoff_mon:{i}          {j}\n') for i,j in zip(inds,varvals,strict=True)]



In [None]:
#cdat.runoff_mwbm.sel(time='1982-01-01')

## the following has NaNs for SCA daily that got rejected by the filter. Need to decide if totally drop, or give a dummary value (-999) or whatnot

In [None]:
##  Snow_covered_area daily
cdat  = xr.open_dataset(obsdir / 'SCA_daily.nc')
cdat = cdat.fillna(-9999)
# set up the indices in sequence
inds = [f'{i.year}_{i.month}_{i.day}:{j}' for i in cdat.indexes['time'] for j in cdat.indexes['nhru']]

# get the variable names
#dvs = list(cdat.keys())

#SCA_mean_obs = (cdat.SCA_max + cdat.SCA_min)/2#calculates mean value using aet_max and aet_min
#varvals =  np.ravel(SCA_mean_obs, order = 'C')# flattens the 2D array to a 1D array
#
#with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
#            [ofp.write(f'sca_daily:{i}          {j}\n') for i,j in zip(inds,varvals,strict=True)]

l_max_sca_daily = cdat.SCA_max
varvals =  np.ravel(l_max_sca_daily, order = 'C')# flattens the 2D array to a 1D array

with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
            [ofp.write(f'l_max_sca_daily:{i}          {j}\n') for i,j in zip(inds,varvals,strict=True)]
    
g_min_sca_daily = cdat.SCA_min
varvals =  np.ravel(g_min_sca_daily, order = 'C')# flattens the 2D array to a 1D array

with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
            [ofp.write(f'g_min_sca_daily:{i}          {j}\n') for i,j in zip(inds,varvals,strict=True)]


In [None]:
#SCA_mean_obs.sel(time='2000-02-28')

In [None]:
##  Streamflow daily
###### Warning: You must run the EFC notebook prior to this block to create the new sf file with EFC codes "EFC_netcdf"
seg_outflow_start = '2000-01-01'# Note: For ease, the start and end dates must be same as those designated in
seg_outflow_end = '2010-12-31'#    "the Create_pest_model_observation_file."

cdat  = xr.open_dataset(rootdir/ cm / 'sf_data_with_EFC.nc').sel(time=slice(seg_outflow_start, seg_outflow_end))
cdat = cdat[['discharge', 'efc', 'high_low']]

In [None]:
moo = cdat.discharge.to_dataframe()
moo.loc[moo['discharge'] <0]


In [None]:
#Creates a dataframe time series of monthly values (average daily rate for the month)
cdat_monthly = cdat.resample(time = 'm').mean(skipna=True)


In [None]:
# Creates a dataframe time series of mean monthly (mean of all jan, feb, mar....)
cdat_mean_monthly = cdat_monthly.groupby('time.month').mean(skipna=True)

cdat_mean_monthly = cdat_mean_monthly.fillna(-9999)
cdat_monthly = cdat_monthly.fillna(-9999)
cdat = cdat.fillna(-9999)

In [None]:
# streamflow_daily is followed by a suffix: "efc"_"high_low" integers
# efc [1, 2, 3, 4, 5] are ['Large flood', 'Small flood', 'High flow pulse', 'Low flow', 'Extreme low flow']
# high_low [1, 2, 3] are ['Low flow', 'Ascending limb', 'Descending limb']

# set up the indices in sequence
inds = [f'_{int(cdat["efc"].sel(poi_id=j, time=i).item())}_{int(cdat["high_low"].sel(poi_id=j, time=i).item())}:{i.year}_{i.month}_{i.day}:{j}' for j in cdat.indexes['poi_id'] for i in cdat.indexes['time']]

# get the variable names
#dvs = list(cdat.keys())

varvals =  np.ravel(cdat['discharge'], order = 'C')# flattens the 2D array to a 1D array

with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
            [ofp.write(f'streamflow_daily{i}          {j}\n') for i,j in zip(inds,varvals,strict=True)]

In [None]:
#Now write to the pest obs file
inds = [f'{i.year}_{i.month}:{j}' for j in cdat_monthly.indexes['poi_id'] for i in cdat_monthly.indexes['time'] ]# set up the indices in sequence
varvals = np.ravel(cdat_monthly['discharge'], order = 'F')# flattens the 2D array to a 1D array--just playing 

with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'streamflow_mon:{i}          {j}\n') for i,j in zip(inds,varvals,strict=True)]

In [None]:
inds = [f'{i}:{j}' for j in cdat_mean_monthly.indexes['poi_id'] for i in cdat_mean_monthly.indexes['month'] ]
varvals =  np.ravel(cdat_mean_monthly['discharge'], order = 'F')# flattens the 2D array to a 1D array 

with open(rootdir / cm / 'allobs.dat', encoding="utf-8", mode='a') as ofp:
    [ofp.write(f'streamflow_mean_mon:{i}          {j}\n') for i,j in zip(inds,varvals,strict=True)]