In [1]:
import pywatershed
import pandas as pd
from pathlib import Path as pl
import json
import numpy as np
from pywatershed.parameters.prms_parameters import JSONParameterEncoder

import sys
sys.path.append('../scripts/')
from pest_utils import pars_to_tpl_entries
sys.path.append('../dependencies/')
import pyemu



## Functions
### Creates a function that writes the parameter file as a json file

In [2]:
def write_to_json_tpl(dims, pars, json_filename):
    with open(json_filename, "w") as ofp:
        ofp.write('ptf ~\n')
        json.dump(
            {**dims,
            **pars},
            ofp,
            indent=4,
            cls=JSONParameterEncoder,
        )
    # this sucks - should be a more direct way but whatevs. it verks
    inlines = open(json_filename, 'r').readlines()
    with open(json_filename, 'w') as ofp:
        [ofp.write(i.replace('"~','~').replace('~"','~')) for i in inlines]

## We will be running on each cutout eventually, but start with one

In [3]:
wkdir = pl('../NHM_extractions/20230110_pois_haj/09112500/')

## Read in the oldskool parameter file as nested dictionaries, a.k.a. a Json-style file. This file would need to be written as a .txt file like the original myparam.param, unless Joe changes the pyWatershed code to read in the json-style file instead. Check with Mike/Joe.
## Eddie and Andy changed the path for this read to the "starting values" that Parker gave us for each extraction.

In [4]:
wkdir

PosixPath('../NHM_extractions/20230110_pois_haj/09112500')

In [5]:
####This was made in a previous notebook now
# pardat = pywatershed.parameters.PrmsParameters.load(wkdir / "myparam.param")#load parameter file from extraction
# pardat.parameters_to_json(wkdir /"parameters.json")


## Now the upper portion making the jason and the lower blocks running the model should be completed prior to the output notebook..or in it.

In [6]:
pardat = pywatershed.parameters.PrmsParameters.load_from_json(wkdir / "parameters.json")#load parameter file from extraction
pars = pardat.parameters
#pars

In [7]:
dims = pardat.dimensions
dims

{'nhru': 10,
 'nsegment': 5,
 'nssr': 10,
 'ngw': 10,
 'npoigages': 2,
 'nobs': 2,
 'ndeplval': 22,
 'ndepl': 2,
 'nmonth': 12,
 'ndoy': 366,
 'scalar': 1}

In [8]:
#Lets check for the pars needed to run NHM model using pyWatershed

In [9]:
nhm_processes = [
    pywatershed.PRMSSolarGeometry,
    pywatershed.PRMSAtmosphere,
    pywatershed.PRMSCanopy,
    pywatershed.PRMSSnow,
    pywatershed.PRMSRunoff,
    pywatershed.PRMSSoilzone,
    pywatershed.PRMSGroundwater,
    pywatershed.PRMSChannel,
]

nhm_params = []
for proc in nhm_processes:
    nhm_params += proc.get_parameters()

In [10]:
nhm_params

['doy',
 'hru_slope',
 'radj_sppt',
 'radj_wppt',
 'hru_lat',
 'hru_area',
 'hru_aspect',
 'doy',
 'radadj_intcp',
 'radadj_slope',
 'tmax_index',
 'dday_slope',
 'dday_intcp',
 'radmax',
 'ppt_rad_adj',
 'tmax_allsnow',
 'tmax_allrain_offset',
 'hru_slope',
 'radj_sppt',
 'radj_wppt',
 'hru_lat',
 'hru_area',
 'hru_aspect',
 'jh_coef',
 'jh_coef_hru',
 'tmax_cbh_adj',
 'tmin_cbh_adj',
 'tmax_allsnow',
 'tmax_allrain_offset',
 'snow_cbh_adj',
 'rain_cbh_adj',
 'adjmix_rain',
 'transp_beg',
 'transp_end',
 'transp_tmax',
 'radadj_intcp',
 'radadj_slope',
 'tmax_index',
 'dday_slope',
 'dday_intcp',
 'radmax',
 'ppt_rad_adj',
 'tmax_allsnow',
 'tmax_allrain_offset',
 'hru_slope',
 'radj_sppt',
 'radj_wppt',
 'hru_lat',
 'hru_area',
 'temp_units',
 'cov_type',
 'covden_sum',
 'covden_win',
 'srain_intcp',
 'wrain_intcp',
 'snow_intcp',
 'potet_sublim',
 'doy',
 'cov_type',
 'covden_win',
 'covden_sum',
 'hru_type',
 'albset_rna',
 'albset_rnm',
 'albset_sna',
 'albset_snm',
 'den_init',
 

#### View the keys for "pars" --These will be the parameters with values listed in the "myparam_starting_vals.param" file of the extraction.

In [11]:
pars.keys()

dict_keys(['doy', 'nhm_id', 'nhm_seg', 'poi_gage_id', 'K_coef', 'adjmix_rain', 'albedo', 'albset_rna', 'albset_rnm', 'albset_sna', 'albset_snm', 'alte', 'altw', 'azrh', 'carea_max', 'cecn_coef', 'cov_type', 'covden_sum', 'covden_win', 'dday_intcp', 'dday_slope', 'den_init', 'den_max', 'dprst_depth_avg', 'dprst_et_coef', 'dprst_flow_coef', 'dprst_frac', 'dprst_frac_init', 'dprst_frac_open', 'dprst_seep_rate_clos', 'dprst_seep_rate_open', 'elev_units', 'emis_noppt', 'epan_coef', 'fastcoef_lin', 'fastcoef_sq', 'freeh2o_cap', 'gw_tau', 'gwflow_coef', 'gwsink_coef', 'gwstor_init', 'gwstor_min', 'hru_area', 'hru_aspect', 'hru_deplcrv', 'hru_elev', 'hru_lat', 'hru_lon', 'hru_percent_imperv', 'hru_segment', 'hru_segment_nhm', 'hru_slope', 'hru_type', 'imperv_stor_max', 'jh_coef', 'jh_coef_hru', 'lat_temp_adj', 'mann_n', 'maxiter_sntemp', 'melt_force', 'melt_look', 'melt_temp', 'obsin_segment', 'obsout_segment', 'op_flow_thres', 'outlet_sta', 'poi_gage_segment', 'poi_type', 'potet_sublim', 'ppt

#### View values of one paramenter

In [12]:
pars['nhm_id']

array([84012, 84017, 84023, 84032, 84038, 84124, 84148, 84165, 85114,
       85116])

## Create a PEST template file version of json-style of myparam_starting_vals.param, "pars"

In [13]:
# Make a list of hrus from "pars"
hrus = pars['nhm_id']
hrus

array([84012, 84017, 84023, 84032, 84038, 84124, 84148, 84165, 85114,
       85116])

In [14]:
segs = pars['nhm_seg']
segs

array([42949, 42951, 42952, 42953, 42954])

### Run through all the currently-defined parameters, and using Mike's function "pars_to_tpl_entries()", write param stating values to a new dataframe "par_starting_vals"

In [15]:
# Create empty dataframe with columns parname (pestpp param name) and parval1 (pestpp starting value)
par_starting_vals = pd.DataFrame(columns=['parname','parval1', 'parubnd','parlbnd'])

In [16]:
par_starting_vals

Unnamed: 0,parname,parval1,parubnd,parlbnd


In [17]:
par_starting_vals = pars_to_tpl_entries(pars, 'adjmix_rain', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=True)
par_starting_vals = pars_to_tpl_entries(pars, 'carea_max', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'cecn_coef', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=True)
par_starting_vals = pars_to_tpl_entries(pars, 'emis_noppt', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'fastcoef_lin', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'freeh2o_cap', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'gwflow_coef', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'jh_coef', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=True)
par_starting_vals = pars_to_tpl_entries(pars, 'mann_n', hrus, segs, par_starting_vals, hru_based=False, 
                    seg_based=True, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'potet_sublim', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'rad_trncf', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'radmax', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=True)
par_starting_vals = pars_to_tpl_entries(pars, 'rain_cbh_adj', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=True)
par_starting_vals = pars_to_tpl_entries(pars, 'slowcoef_sq', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'smidx_coef', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'smidx_exp', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'snarea_thresh', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'snowinfil_max', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'snow_cbh_adj', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=True)
par_starting_vals = pars_to_tpl_entries(pars, 'soil2gw_max', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'soil_moist_max', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'soil_rechr_max_frac', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'ssr2gw_exp', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'ssr2gw_rate', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=False)
par_starting_vals = pars_to_tpl_entries(pars, 'tmax_allrain_offset', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=True)
par_starting_vals = pars_to_tpl_entries(pars, 'tmax_allsnow', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=True)
par_starting_vals = pars_to_tpl_entries(pars, 'tmax_cbh_adj', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=True)
par_starting_vals = pars_to_tpl_entries(pars, 'tmin_cbh_adj', hrus, segs, par_starting_vals, hru_based=True, 
                    seg_based=False, month_based=True)




In [18]:
par_starting_vals

Unnamed: 0,parname,parval1,parubnd,parlbnd
0,adjmix_rain:hru_84012:mon_1,1.0,,
1,adjmix_rain:hru_84017:mon_1,1.0,,
2,adjmix_rain:hru_84023:mon_1,1.0,,
3,adjmix_rain:hru_84032:mon_1,1.0,,
4,adjmix_rain:hru_84038:mon_1,1.0,,
...,...,...,...,...
115,tmin_cbh_adj:hru_84124:mon_12,0.0,,
116,tmin_cbh_adj:hru_84148:mon_12,0.0,,
117,tmin_cbh_adj:hru_84165:mon_12,0.0,,
118,tmin_cbh_adj:hru_85114:mon_12,0.0,,


In [19]:
par_starting_vals.set_index('parname', inplace =True, drop = False)
par_starting_vals
xx = par_starting_vals.loc[par_starting_vals.parname.str.startswith('carea_max'), :]
xx

Unnamed: 0_level_0,parname,parval1,parubnd,parlbnd
parname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
carea_max:hru_84012,carea_max:hru_84012,0.2294,,
carea_max:hru_84017,carea_max:hru_84017,0.771654,,
carea_max:hru_84023,carea_max:hru_84023,0.236148,,
carea_max:hru_84032,carea_max:hru_84032,0.653488,,
carea_max:hru_84038,carea_max:hru_84038,0.171317,,
carea_max:hru_84124,carea_max:hru_84124,0.114285,,
carea_max:hru_84148,carea_max:hru_84148,0.143949,,
carea_max:hru_84165,carea_max:hru_84165,0.183103,,
carea_max:hru_85114,carea_max:hru_85114,0.143847,,
carea_max:hru_85116,carea_max:hru_85116,0.175056,,


In [20]:
# need a loop here

## Setting bounds for parameters
### There were three ways to set parameter bounds in NHM calibration:
#### 1) "not used" in the by HRU calibration, all HRU values for this type were grouped and moved as a group in the full calibration range for the parameter.
#### 2) "range" were calibrated by HRU so will move independently within the calibrations range in table 3.
#### 2b) "percent" were calibrated by HRU but only allowed a range of +/- 20% of the starting value.

In [21]:
bnds_path = '../Supporting_information/par_cal_bounds_use.csv'
bnds = pd.read_csv(bnds_path) # Creates a data frame of the bounds for par catagories
bnds.set_index('parameter_name', inplace =True, drop = False)

In [22]:
bnds.sample(15)

Unnamed: 0_level_0,parameter_name,no_use_if_no_snow,par_lower_bound,par_upper_bound,par_default_val,HRU_cal_method,Part2_step,frac_diff_from_lbnd,default_par_start_val,new_cal_method,value_type,old_HRU_cal_method
parameter_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
snarea_thresh,snarea_thresh,,0.0,50.0,1.0,Percent,Not used,1.0,0.95,"Percent, start val",variable,Percent
gwflow_coef,gwflow_coef,,0.001,0.5,0.015,Percent,3,0.141414,0.015,"Percent, start_val",variable,Percent
radmax,radmax,,0.5,1.0,0.8,Range,3,0.6,0.8,Range,uniform,Range
cecn_coef,cecn_coef,,4.5,5.5,5.0,Range,3,0.5,5.0,"Percent, from table",uniform,Not used
smidx_exp,smidx_exp,,0.0,5.0,0.3,Percent,Not used,0.0,0.0,"Percent, start val",variable,Not used
tmax_allrain_offset,tmax_allrain_offset,1.0,0.0,10.0,1.0,Percent,4,0.1,1.0,"Percent, start val",variable,Percent
freeh2o_cap,freeh2o_cap,,0.01,0.1,0.05,Range,4,0.444444,0.05,Range,uniform,Range
emis_noppt,emis_noppt,1.0,0.757,1.0,0.757,Range,4,0.0,0.7813,"Percent, from table",uniform,Not used
jh_coef,jh_coef,,0.0,1.5,0.014,Percent,Not used,0.07,0.02,"Percent, start_val",variable,Percent
ssr2gw_rate,ssr2gw_rate,,0.01,0.8,0.1,Percent,4,0.113924,0.1,"Percent, start val",variable,Not used


In [23]:
##Create the lists of parameters for the claibration methods used
percent_list = bnds.loc[bnds.HRU_cal_method == 'Percent','parameter_name'].reset_index(drop = True)
range_list = bnds.loc[bnds.HRU_cal_method == 'Range','parameter_name']#.to_list() Note, all values are uniform starting values populated from the table 'par_vale_use.csv'
not_used_list = bnds.loc[bnds.HRU_cal_method == 'Not used','parameter_name']#.to_list()
print(not_used_list)

Series([], Name: parameter_name, dtype: object)


In [24]:
#We will rewrite these using the cn, _ and .loc method edicimated to us by 'the great one.'
for idx, row in par_starting_vals.iterrows():
    for param in percent_list:
        pst_parname = str(row.parname)
        prms_parname = param
        #print(prms_parname)
        x = pst_parname.startswith(prms_parname)# Just a yes not response to if the pst parname starts with the root in "".
        if x:
            par_starting_vals.loc[pst_parname,'parubnd'] = (0.2*par_starting_vals.loc[pst_parname, 'parval1'])+par_starting_vals.loc[pst_parname, 'parval1']
            par_starting_vals.loc[pst_parname,'parlbnd'] = par_starting_vals.loc[pst_parname, 'parval1']-(0.2*par_starting_vals.loc[pst_parname, 'parval1'])

for idx, row in par_starting_vals.iterrows():
    for param in range_list:
        pst_parname = str(row.parname)
        prms_parname = param
        #print(prms_parname)
        x = pst_parname.startswith(prms_parname)# Just a yes not response to if the pst parname starts with the root in "".
        if x:
             par_starting_vals.loc[pst_parname,'parubnd'] = bnds.loc[prms_parname,'par_upper_bound']
             par_starting_vals.loc[pst_parname,'parlbnd'] = bnds.loc[prms_parname,'par_lower_bound']

#for idx, row in par_starting_vals.iterrows():
#    for param in not_used_list:
#        pst_parname = str(row.parname)
#        prms_parname = param
#        #print(prms_parname)
#        x = pst_parname.startswith(prms_parname)# Just a yes not response to if the pst parname starts with the root in "".
#        if x:
#            par_starting_vals.loc[pst_parname,'parubnd'] = (0.2*par_starting_vals.loc[pst_parname, 'parval1'])+par_starting_vals.loc[pst_parname, 'parval1']
#            par_starting_vals.loc[pst_parname,'parlbnd'] = par_starting_vals.loc[pst_parname, 'parval1']-(0.2*par_starting_vals.loc[pst_parname, 'parval1'])
#             

In [25]:
#xx = par_starting_vals.loc[par_starting_vals.parname.str.startswith('adjmix_rain'), :]
#xx

### once we have all the parameter arrays replaced by names, we can write out the template file

In [26]:
write_to_json_tpl(dims, pars, wkdir / 'parameters.json.tpl')
par_starting_vals.to_csv(wkdir / 'starting_par_vals.dat', index=None, sep=' ')

### Map observation name to the Instruction File (.ins)

In [27]:
obsvals = pd.read_csv(wkdir / 'allobs.dat', delim_whitespace= True)
obsvals.set_index('obsname', inplace =True, drop = False)
#obsvals.sample(5)
print(obsvals)
print('The values for "obsval" are the true observation values.')

                                                         obsname      obsval
obsname                                                                     
l_max_actet_mon:2000_1:84012        l_max_actet_mon:2000_1:84012    0.025500
l_max_actet_mon:2000_1:84017        l_max_actet_mon:2000_1:84017    0.024600
l_max_actet_mon:2000_1:84023        l_max_actet_mon:2000_1:84023    0.025800
l_max_actet_mon:2000_1:84032        l_max_actet_mon:2000_1:84032    0.023400
l_max_actet_mon:2000_1:84038        l_max_actet_mon:2000_1:84038    0.027500
...                                                          ...         ...
streamflow_mean_mon:8:09112500    streamflow_mean_mon:8:09112500  169.481232
streamflow_mean_mon:9:09112500    streamflow_mean_mon:9:09112500  111.335457
streamflow_mean_mon:10:09112500  streamflow_mean_mon:10:09112500  112.024628
streamflow_mean_mon:11:09112500  streamflow_mean_mon:11:09112500   95.384842
streamflow_mean_mon:12:09112500  streamflow_mean_mon:12:09112500   78.603813

In [28]:
with open(wkdir /'modelobs.dat.ins', 'w') as ofp:
    ofp.write('pif ~\n')
    ofp.write('~obsval~\n')
    [ofp.write(f'l1 w !{i}!\n') for i in obsvals.obsname]

### create PST control file object with `pyemu`

In [29]:
pst = pyemu.Pst.from_io_files(tpl_files = [str(wkdir / 'parameters.json.tpl')],
                              in_files=[str(wkdir / 'parameters.json')],# Values for parval1 and bnds will be populated with default values
                              ins_files = [str(wkdir / 'modelobs.dat.ins')],
                              out_files = [str(wkdir / 'modelobs.dat')], #names the model output file in the control file (prior_mc.pst)--Chk with Mike
                              pst_path = '.')
#Ask Mike if pyemu reads in the values for the obs from the modelobs.dat file

error parsing metadata from 'obsnme', continuing


## Direct editing of the PEST parameter file

## Starting parameter values
### Starting values were set from the initial parameter file used, in our case it was the "pre-calibration" values given to us by Parker. SO! No changes to those values, but we will need to customize the upper and lower bounds!

In [30]:
pars = pst.parameter_data
#pars

In [31]:
#pars

In [32]:
#pars.loc['adjmix_rain:hru_5621:mon_1','parval1'] = 987236
#pst.parameter_data


### Copy parval1, upper bound and lower bound from "par_starting_vals" to pars.parval1 

In [33]:
for idx, row in pars.iterrows():
    pars.loc[pars.parnme,'parval1'] = par_starting_vals.loc[pars.parnme,'parval1']
    pars.loc[pars.parnme,'parubnd'] = par_starting_vals.loc[pars.parnme,'parubnd']
    pars.loc[pars.parnme,'parlbnd'] = par_starting_vals.loc[pars.parnme,'parlbnd']



In [34]:
pars.sample(30)

Unnamed: 0,parnme,partrans,parchglim,parval1,parlbnd,parubnd,pargp,scale,offset,dercom,...,sublim,trncf,radmax,adj,sq,exp,thresh,frac,rate,allsnow
tmin_cbh_adj:hru_84012:mon_3,tmin_cbh_adj:hru_84012:mon_3,log,factor,0.0,-3.0,3.0,pargp,1.0,0.0,1,...,,,,hru,,,,,,
tmax_allsnow:hru_84023:mon_9,tmax_allsnow:hru_84023:mon_9,log,factor,29.313791,23.451033,35.176549,pargp,1.0,0.0,1,...,,,,,,,,,,hru
snow_cbh_adj:hru_84023:mon_12,snow_cbh_adj:hru_84023:mon_12,log,factor,1.0,0.5,1.75,pargp,1.0,0.0,1,...,,,,hru,,,,,,
snow_cbh_adj:hru_84165:mon_6,snow_cbh_adj:hru_84165:mon_6,log,factor,1.0,0.5,1.75,pargp,1.0,0.0,1,...,,,,hru,,,,,,
tmax_allrain_offset:hru_84032:mon_1,tmax_allrain_offset:hru_84032:mon_1,log,factor,7.649709,6.119767,9.179651,pargp,1.0,0.0,1,...,,,,,,,,,,
adjmix_rain:hru_84165:mon_9,adjmix_rain:hru_84165:mon_9,log,factor,1.0,0.6,1.4,pargp,1.0,0.0,1,...,,,,,,,,,,
tmax_allsnow:hru_84023:mon_11,tmax_allsnow:hru_84023:mon_11,log,factor,28.426603,22.741282,34.111924,pargp,1.0,0.0,1,...,,,,,,,,,,hru
tmax_allrain_offset:hru_84165:mon_6,tmax_allrain_offset:hru_84165:mon_6,log,factor,7.046521,5.637217,8.455825,pargp,1.0,0.0,1,...,,,,,,,,,,
snow_cbh_adj:hru_84124:mon_10,snow_cbh_adj:hru_84124:mon_10,log,factor,1.0,0.5,1.75,pargp,1.0,0.0,1,...,,,,hru,,,,,,
tmax_allsnow:hru_85114:mon_9,tmax_allsnow:hru_85114:mon_9,log,factor,30.681358,24.545086,36.81763,pargp,1.0,0.0,1,...,,,,,,,,,,hru


### Copy upper and lower bounds from par_cal_bounds_use.csv to par.parubnd and par.parlbnd
### AND...overwite parval1 with new strating values determined from default values listed in PRMS table 5.2.1 (published), https://water.usgs.gov/water-resources/software/PRMS/--Chack with jacob and make sure these jive with what they used in the cal script. NO we are not doing this anymore!

In [35]:
prms_parnme_list =bnds['parameter_name']# Make a list of the nhm par names for loops below
#print(prms_parnme_list)

In [36]:
#We recan delete this because we replaced this assignment above
#for idx, row in pars.iterrows():
#    for i in prms_parnme_list:
#        pst_parnme = str(row.parnme)
#        prms_parnme = prms_parnme_list[i]
#        x = pst_parnme.startswith(prms_parnme)# Just a yes not response to if the pst parname starts with the root in "".
#        if x :
#            pars.loc[pst_parnme,'parubnd'] = bnds.loc[prms_parnme,'par_upper_bound']
#            pars.loc[pst_parnme,'parlbnd'] = bnds.loc[prms_parnme,'par_lower_bound']
#            #pars.loc[pst_parnme,'parval1'] = bnds.loc[prms_parnme,'par_start_val'] remove

### we can't log transform negative parameter values

In [37]:
pars.loc[pars.parlbnd<=0, 'partrans'] = 'none'

In [38]:
### obs.loc[obsvals.obsname, 'obsval'] = obsvals.obsval.values

#### Set obsval in the "pst.observation_data" frame back to the true observation value

In [39]:
obs = pst.observation_data #This pulls the "observation data" from the pst dataframe and sets it to the "obs" object (dataframe)

In [40]:
obs.loc[obs.obsnme, :]

Unnamed: 0,obsnme,obsval,weight,obgnme
g_min_actet_mean_mon:10:84012,g_min_actet_mean_mon:10:84012,0.048512,1.0,obgnme
g_min_actet_mean_mon:10:84017,g_min_actet_mean_mon:10:84017,0.011868,1.0,obgnme
g_min_actet_mean_mon:10:84023,g_min_actet_mean_mon:10:84023,0.049004,1.0,obgnme
g_min_actet_mean_mon:10:84032,g_min_actet_mean_mon:10:84032,0.018778,1.0,obgnme
g_min_actet_mean_mon:10:84038,g_min_actet_mean_mon:10:84038,0.052744,1.0,obgnme
...,...,...,...,...
streamflow_mon:2010_7:09112500,streamflow_mon:2010_7:09112500,139.133403,1.0,obgnme
streamflow_mon:2010_8:09112200,streamflow_mon:2010_8:09112200,10.742105,1.0,obgnme
streamflow_mon:2010_8:09112500,streamflow_mon:2010_8:09112500,95.236562,1.0,obgnme
streamflow_mon:2010_9:09112200,streamflow_mon:2010_9:09112200,7.415596,1.0,obgnme


In [41]:
obs.loc[obs.obsnme =='actet_mon:2000_1:5621',:] # This is the value in the modelobs.dat file?

Unnamed: 0,obsnme,obsval,weight,obgnme


In [42]:
obsvals.loc[obsvals.obsname =='actet_mon:2000_1:5621',:]

Unnamed: 0_level_0,obsname,obsval
obsname,Unnamed: 1_level_1,Unnamed: 2_level_1


In [43]:
#obs = obs.loc[obsvals.obsname,:] #resorts datframe for easy in reading

In [44]:
obs.loc[obsvals.obsname, 'obsval'] = obsvals.obsval.values #True observation value is copied over to obs
obs

Unnamed: 0,obsnme,obsval,weight,obgnme
g_min_actet_mean_mon:10:84012,g_min_actet_mean_mon:10:84012,0.003545,1.0,obgnme
g_min_actet_mean_mon:10:84017,g_min_actet_mean_mon:10:84017,0.009364,1.0,obgnme
g_min_actet_mean_mon:10:84023,g_min_actet_mean_mon:10:84023,0.002491,1.0,obgnme
g_min_actet_mean_mon:10:84032,g_min_actet_mean_mon:10:84032,0.011273,1.0,obgnme
g_min_actet_mean_mon:10:84038,g_min_actet_mean_mon:10:84038,0.014191,1.0,obgnme
...,...,...,...,...
streamflow_mon:2010_7:09112500,streamflow_mon:2010_7:09112500,247.709671,1.0,obgnme
streamflow_mon:2010_8:09112200,streamflow_mon:2010_8:09112200,153.554840,1.0,obgnme
streamflow_mon:2010_8:09112500,streamflow_mon:2010_8:09112500,178.451614,1.0,obgnme
streamflow_mon:2010_9:09112200,streamflow_mon:2010_9:09112200,74.519997,1.0,obgnme


In [45]:
obs.loc[obs.obsnme =='actet_mon:2000_1:5621',:] # Check for change

Unnamed: 0,obsnme,obsval,weight,obgnme


#### Creating Groups observations

In [46]:
obs.loc[obs.obsnme.str.startswith('l_max_actet_mon'),'obgnme'] = 'l_max_actet_mon'
obs.loc[obs.obsnme.str.startswith('g_min_actet_mon'),'obgnme'] = 'g_min_actet_mon'

obs.loc[obs.obsnme.str.startswith('l_max_actet_mean_mon'),'obgnme'] = 'l_max_actet_mean_mon'
obs.loc[obs.obsnme.str.startswith('g_min_actet_mean_mon'),'obgnme'] = 'g_min_actet_mean_mon'

obs.loc[obs.obsnme.str.startswith('l_max_recharge_ann'),'obgnme'] = 'l_max_recharge_ann'
obs.loc[obs.obsnme.str.startswith('g_min_recharge_ann'),'obgnme'] = 'g_min_recharge_ann'

obs.loc[obs.obsnme.str.startswith('l_max_soil_moist_mon'),'obgnme'] = 'l_max_soil_moist_mon'
obs.loc[obs.obsnme.str.startswith('g_min_soil_moist_mon'),'obgnme'] = 'g_min_soil_moist_mon'

obs.loc[obs.obsnme.str.startswith('l_max_soil_moist_ann'),'obgnme'] = 'l_max_soil_moist_ann'
obs.loc[obs.obsnme.str.startswith('g_min_soil_moist_ann'),'obgnme'] = 'g_min_soil_moist_ann'


#obs.loc[obs.obsnme.str.startswith('runoff_mon'),'obgnme'] = 'runoff_mon'
obs.loc[obs.obsnme.str.startswith('l_max_runoff_mon'),'obgnme'] = 'l_max_runoff_mon'
obs.loc[obs.obsnme.str.startswith('g_min_runoff_mon'),'obgnme'] = 'g_min_runoff_mon'

#obs.loc[obs.obsnme.str.startswith('sca_daily'),'obgnme'] = 'sca_daily'
obs.loc[obs.obsnme.str.startswith('l_max_sca_daily'),'obgnme'] = 'l_max_sca_daily'
obs.loc[obs.obsnme.str.startswith('g_min_sca_daily'),'obgnme'] = 'g_min_sca_daily'



#obs.loc[obs.obsnme.str.startswith('streamflow_daily'),'obgnme'] = 'streamflow_daily'

# Create EFC Groups for daily streamflows
# streamflow_daily is followed by a suffix: "efc"_"high_low" integers
# efc [1, 2, 3, 4, 5] are ['Large flood', 'Small flood', 'High flow pulse', 'Low flow', 'Extreme low flow']
# high_low [1, 2, 3] are ['Low flow', 'Ascending limb', 'Descending limb']
# Pest++ group names were written with flows in mind.

obs.loc[obs.obsnme.str.startswith('streamflow_daily_1_2'),'obgnme'] = 'streamflow_daily_large_asc'
obs.loc[obs.obsnme.str.startswith('streamflow_daily_1_3'),'obgnme'] = 'streamflow_daily_large_dsc'
obs.loc[obs.obsnme.str.startswith('streamflow_daily_2_2'),'obgnme'] = 'streamflow_daily_small_asc'
obs.loc[obs.obsnme.str.startswith('streamflow_daily_2_3'),'obgnme'] = 'streamflow_daily_small_dsc'
obs.loc[obs.obsnme.str.startswith('streamflow_daily_3_2'),'obgnme'] = 'streamflow_daily_pulse_asc'
obs.loc[obs.obsnme.str.startswith('streamflow_daily_3_3'),'obgnme'] = 'streamflow_daily_pulse_dsc'
obs.loc[obs.obsnme.str.startswith('streamflow_daily_4_1'),'obgnme'] = 'streamflow_daily_low'
obs.loc[obs.obsnme.str.startswith('streamflow_daily_5_1'),'obgnme'] = 'streamflow_daily_exlow'

#Special group for no flow
obs.loc[obs.obsnme.str.startswith('streamflow_daily_-9999_-9999'),'obgnme'] = 'streamflow_nodata'
obs.loc[(obs.obsnme.str.startswith('streamflow_daily')) &
      (obs.obsval==-9999), 'obgnme'] = 'streamflow_nodata'
obs.loc[obs.obsnme.str.startswith('streamflow_mon'),'obgnme'] = 'streamflow_mon'
obs.loc[obs.obsnme.str.startswith('streamflow_mean_mon'),'obgnme'] = 'streamflow_mean_mon'
obs.sample(30)

Unnamed: 0,obsnme,obsval,weight,obgnme
l_max_sca_daily:2002_5_17:85116,l_max_sca_daily:2002_5_17:85116,0.229,1.0,l_max_sca_daily
l_max_soil_moist_mon:2006_5:84165,l_max_soil_moist_mon:2006_5:84165,0.79528,1.0,l_max_soil_moist_mon
l_max_runoff_mon:1990_9:85116,l_max_runoff_mon:1990_9:85116,13.8137,1.0,l_max_runoff_mon
g_min_sca_daily:2001_9_29:84032,g_min_sca_daily:2001_9_29:84032,-9999.0,1.0,g_min_sca_daily
g_min_sca_daily:2002_4_15:84023,g_min_sca_daily:2002_4_15:84023,-9999.0,1.0,g_min_sca_daily
l_max_sca_daily:2007_6_11:84017,l_max_sca_daily:2007_6_11:84017,0.0,1.0,l_max_sca_daily
l_max_sca_daily:2009_11_20:85116,l_max_sca_daily:2009_11_20:85116,0.961493,1.0,l_max_sca_daily
g_min_sca_daily:2008_9_25:84038,g_min_sca_daily:2008_9_25:84038,0.0,1.0,g_min_sca_daily
l_max_sca_daily:2008_9_11:85116,l_max_sca_daily:2008_9_11:85116,-9999.0,1.0,l_max_sca_daily
l_max_sca_daily:2000_6_18:84023,l_max_sca_daily:2000_6_18:84023,-9999.0,1.0,l_max_sca_daily


In [47]:
obs['obgnme'].unique()

array(['g_min_actet_mean_mon', 'g_min_actet_mon', 'g_min_recharge_ann',
       'g_min_runoff_mon', 'g_min_sca_daily', 'g_min_soil_moist_ann',
       'g_min_soil_moist_mon', 'l_max_actet_mean_mon', 'l_max_actet_mon',
       'l_max_recharge_ann', 'l_max_runoff_mon', 'l_max_sca_daily',
       'l_max_soil_moist_ann', 'l_max_soil_moist_mon',
       'streamflow_daily_pulse_asc', 'streamflow_daily_pulse_dsc',
       'streamflow_daily_low', 'streamflow_daily_exlow',
       'streamflow_mean_mon', 'streamflow_mon'], dtype=object)

In [48]:


#Set weights for groups"
## TODO: Assign weights for all but streamflow that make sense as 1/std

###Need to tailor these wts individually to the STDV values that we assume are "good."

# obs.loc[obs.obgnme=='l_max_actet_mean_mon','weight'] = 3.0E+04
# obs.loc[obs.obgnme=='g_min_actet_mean_mon','weight'] = 3.0E+04

# obs.loc[obs.obgnme=='l_max_actet_mon','weight'] = 0.75E+04
# obs.loc[obs.obgnme=='g_min_actet_mon','weight'] = 0.75E+04

# obs.loc[obs.obgnme=='l_max_recharge_ann','weight'] = 0.4E+04
# obs.loc[obs.obgnme=='g_min_recharge_ann','weight'] = 0.4E+04

# obs.loc[obs.obgnme=='l_max_soil_moist_ann','weight'] = 2.5E+03
# obs.loc[obs.obgnme=='g_min_soil_moist_ann','weight'] = 2.5E+03

# obs.loc[obs.obgnme=='l_max_soil_moist_mon','weight'] = 8E+02
# obs.loc[obs.obgnme=='g_min_soil_moist_mon','weight'] = 8E+02


# obs.loc[obs.obgnme=='l_max_sca_daily','weight'] = 0 #3E-03
# obs.loc[obs.obgnme=='g_min_sca_daily','weight'] = 0 #3E-03

# obs.loc[obs.obgnme=='l_max_runoff_mon','weight'] = 3.5
# obs.loc[obs.obgnme=='g_min_runoff_mon','weight'] = 3.5



# obs.loc[obs.obgnme.str.startswith('streamflow'), 'weight'] = \
#     10 / obs.loc[obs.obgnme.str.startswith('streamflow'),'obsval']
# obs.loc[obs.obgnme=='streamflow_nodata','weight'] = 0

# # special case for streamflow with 0 observed value
# obs.loc[(obs.obsval<=1) & (obs.obgnme.str.startswith('stream')), 'weight'] = 1.0



In [49]:
obs.loc[(obs.obsval<=1) & (obs.obgnme.str.startswith('stream'))]

Unnamed: 0,obsnme,obsval,weight,obgnme


In [50]:
#obs.loc[obs.obgnme.str.startswith('streamflow')

## now we flip these weights back to standard deviation for the noise ensemble and then do not revisit STD, although we will adjust weights to rebalance PHI--Retooled

In [51]:
#obs.loc[:,'standard_deviation'] = [1/w if w!=0 else 1e-6 for w in obs.weight]

## Set SD and bounds for obs from file "Observation_standard_deviation.csv" in Supporting Information folder; if you want to change bounds and SD, change values in the .csv file. Primarily to make sure values during the prior don't go negative.

In [52]:
obs_sdbnds_path = '../Supporting_information/Observation_standard_deviation.csv'
obs_sdbnds = pd.read_csv(obs_sdbnds_path) # Creates a data frame of the bounds for par catagories
obs_sdbnds.set_index('obsgroup', inplace =True, drop = False)

In [53]:
obs_sdbnds

Unnamed: 0_level_0,obsgroup,noise_percent,units,obslbnd,obsubnd,wt_percent
obsgroup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
l_max_actet_mean_mon,l_max_actet_mean_mon,0.001,in/day,0,100000000000.0,0.2
g_min_actet_mean_mon,g_min_actet_mean_mon,0.001,in/day,0,100000000000.0,0.2
l_max_actet_mon,l_max_actet_mon,0.001,in/day,0,100000000000.0,0.2
g_min_actet_mon,g_min_actet_mon,0.001,in/day,0,100000000000.0,0.2
l_max_recharge_ann,l_max_recharge_ann,0.001,dimensionless,0,1.0,0.25
g_min_recharge_ann,g_min_recharge_ann,0.001,dimensionless,0,1.0,0.25
l_max_runoff_mon,l_max_runoff_mon,0.001,cfs,0,100000000000.0,0.1
g_min_runoff_mon,g_min_runoff_mon,0.001,cfs,0,100000000000.0,0.1
l_max_sca_daily,l_max_sca_daily,0.001,dimensionless,0,1.0,0.15
g_min_sca_daily,g_min_sca_daily,0.001,dimensionless,0,1.0,0.15


In [54]:
obs_sdbnds.index =[i.strip() for i in obs_sdbnds.index]# strip removes the extra spaces and /n etc

In [55]:
obs_sdbnds.index.unique()

Index(['l_max_actet_mean_mon', 'g_min_actet_mean_mon', 'l_max_actet_mon',
       'g_min_actet_mon', 'l_max_recharge_ann', 'g_min_recharge_ann',
       'l_max_runoff_mon', 'g_min_runoff_mon', 'l_max_sca_daily',
       'g_min_sca_daily', 'l_max_soil_moist_ann', 'g_min_soil_moist_ann',
       'l_max_soil_moist_mon', 'g_min_soil_moist_mon',
       'streamflow_daily_large_asc', 'streamflow_daily_large_dsc',
       'streamflow_daily_small_asc', 'streamflow_daily_small_dsc',
       'streamflow_daily_pulse_asc', 'streamflow_daily_pulse_dsc',
       'streamflow_daily_low', 'streamflow_daily_exlow', 'streamflow_mean_mon',
       'streamflow_mon', 'streamflow_nodata'],
      dtype='object')

In [56]:
obs['lower_bound'] = 0
obs['upper_bound'] = np.nan
obs['standard_deviation'] = np.nan
#obs['weight'] = np.nan

In [57]:
obsgroup_list = obs_sdbnds['obsgroup']
obsgroup_list

l_max_actet_mean_mon          l_max_actet_mean_mon       
g_min_actet_mean_mon          g_min_actet_mean_mon       
l_max_actet_mon               l_max_actet_mon            
g_min_actet_mon               g_min_actet_mon            
l_max_recharge_ann            l_max_recharge_ann         
g_min_recharge_ann            g_min_recharge_ann         
l_max_runoff_mon                     l_max_runoff_mon    
g_min_runoff_mon                     g_min_runoff_mon    
l_max_sca_daily               l_max_sca_daily            
g_min_sca_daily               g_min_sca_daily            
l_max_soil_moist_ann          l_max_soil_moist_ann       
g_min_soil_moist_ann          g_min_soil_moist_ann       
l_max_soil_moist_mon          l_max_soil_moist_mon       
g_min_soil_moist_mon          g_min_soil_moist_mon       
streamflow_daily_large_asc    streamflow_daily_large_asc 
streamflow_daily_large_dsc    streamflow_daily_large_dsc 
streamflow_daily_small_asc    streamflow_daily_small_asc 
streamflow_dai

In [58]:
obs_sdbnds.columns

Index(['obsgroup', 'noise_percent', 'units', 'obslbnd', 'obsubnd',
       'wt_percent'],
      dtype='object')

In [59]:
obs.loc[obs.obgnme=='streamflow_nodata']

Unnamed: 0,obsnme,obsval,weight,obgnme,lower_bound,upper_bound,standard_deviation


In [60]:
obs

Unnamed: 0,obsnme,obsval,weight,obgnme,lower_bound,upper_bound,standard_deviation
g_min_actet_mean_mon:10:84012,g_min_actet_mean_mon:10:84012,0.003545,1.0,g_min_actet_mean_mon,0,,
g_min_actet_mean_mon:10:84017,g_min_actet_mean_mon:10:84017,0.009364,1.0,g_min_actet_mean_mon,0,,
g_min_actet_mean_mon:10:84023,g_min_actet_mean_mon:10:84023,0.002491,1.0,g_min_actet_mean_mon,0,,
g_min_actet_mean_mon:10:84032,g_min_actet_mean_mon:10:84032,0.011273,1.0,g_min_actet_mean_mon,0,,
g_min_actet_mean_mon:10:84038,g_min_actet_mean_mon:10:84038,0.014191,1.0,g_min_actet_mean_mon,0,,
...,...,...,...,...,...,...,...
streamflow_mon:2010_7:09112500,streamflow_mon:2010_7:09112500,247.709671,1.0,streamflow_mon,0,,
streamflow_mon:2010_8:09112200,streamflow_mon:2010_8:09112200,153.554840,1.0,streamflow_mon,0,,
streamflow_mon:2010_8:09112500,streamflow_mon:2010_8:09112500,178.451614,1.0,streamflow_mon,0,,
streamflow_mon:2010_9:09112200,streamflow_mon:2010_9:09112200,74.519997,1.0,streamflow_mon,0,,


In [61]:
for cn,_ in obs.groupby('obgnme'):
    obs.loc[obs.obgnme == cn, 'upper_bound']= obs_sdbnds.loc[cn, 'obsubnd']
    #print(cn)

In [62]:
for cn,_ in obs.groupby('obgnme'):
    obs_group_percent = obs_sdbnds.loc[cn, 'noise_percent']
    obs.loc[obs.obgnme == cn, 'standard_deviation']= obs_group_percent*(obs.loc[obs.obgnme==cn, 'obsval'])
    #print(cn)

#Replace std value with 9999 where obsval values with "9999"
obs.loc[obs.obsval == -9999, 'standard_deviation']= 9999

In [63]:
#obs.loc[obs.standard_deviation.isnull()]


In [64]:
#But, to read in the "other" SD, the SD for the value, not the noise.

In [65]:

#Do this for streamflow but not the rest
for cn,_ in obs.groupby('obgnme'):
    if cn.startswith('streamflow_'):
        obs_group_percent = obs_sdbnds.loc[cn, 'wt_percent']#"wt_percent" in th etable is a fractional value from csv
        obs.loc[obs.obgnme == cn, 'weight']= 1/(obs_group_percent*(obs.loc[obs.obgnme==cn, 'obsval']))
    else:
        obs_group_percent = obs_sdbnds.loc[cn, 'wt_percent']
        obs.loc[obs.obgnme == cn, 'weight']= 1/obs_group_percent


#For the inequality calibration obs, do NOT take weight calc using the obs val
obs.loc[obs.obgnme.str.startswith('streamflow_'),'weight'] = 'streamflow_daily_large_asc'




In [66]:
obs.loc[obs.obgnme=='l_max_sca_daily','weight'] = 0 #3E-03
obs.loc[obs.obgnme=='g_min_sca_daily','weight'] = 0 #3E-00

obs.loc[obs.obgnme.str.startswith('streamflow'), 'weight'] = \
    10 / obs.loc[obs.obgnme.str.startswith('streamflow'),'obsval']

obs.loc[obs.obgnme=='streamflow_nodata','weight'] = 0

# special case for streamflow with 0 observed value
obs.loc[(obs.obsval<=1) & (obs.obgnme.str.startswith('stream')), 'weight'] = 1.0

#Replace -9999 obs_val values with 0 weight
obs.loc[obs.obsval == -9999, 'weight']= 0

In [67]:
obs.sample(20)

Unnamed: 0,obsnme,obsval,weight,obgnme,lower_bound,upper_bound,standard_deviation
g_min_sca_daily:2007_10_4:84165,g_min_sca_daily:2007_10_4:84165,-9999.0,0.0,g_min_sca_daily,0,1.0,9999.0
l_max_sca_daily:2000_11_23:84124,l_max_sca_daily:2000_11_23:84124,0.812381,0.0,l_max_sca_daily,0,1.0,0.000812
g_min_sca_daily:2002_3_27:85114,g_min_sca_daily:2002_3_27:85114,-9999.0,0.0,g_min_sca_daily,0,1.0,9999.0
l_max_sca_daily:2008_2_21:84032,l_max_sca_daily:2008_2_21:84032,-9999.0,0.0,l_max_sca_daily,0,1.0,9999.0
g_min_sca_daily:2010_12_8:84148,g_min_sca_daily:2010_12_8:84148,-9999.0,0.0,g_min_sca_daily,0,1.0,9999.0
g_min_sca_daily:2000_1_14:84017,g_min_sca_daily:2000_1_14:84017,-9999.0,0.0,g_min_sca_daily,0,1.0,9999.0
g_min_sca_daily:2000_6_3:85114,g_min_sca_daily:2000_6_3:85114,0.245133,0.0,g_min_sca_daily,0,1.0,0.000245
l_max_sca_daily:2004_11_22:84023,l_max_sca_daily:2004_11_22:84023,-9999.0,0.0,l_max_sca_daily,0,1.0,9999.0
l_max_sca_daily:2003_4_17:84012,l_max_sca_daily:2003_4_17:84012,-9999.0,0.0,l_max_sca_daily,0,1.0,9999.0
g_min_sca_daily:2002_5_28:85114,g_min_sca_daily:2002_5_28:85114,-9999.0,0.0,g_min_sca_daily,0,1.0,9999.0


# consolidate the run scripts into a single script
### Eddie commented out after modification of the forward_run.py file with James during debugging. Eddie will eventually fix this and bring it back in.

In [68]:
imports = [i.strip() for i in open('../scripts/run-pynhm.py', 'r').readlines() if i.strip().startswith('import')]
imports.extend([i.strip() for i in open('../scripts/post-process_model_output.py', 'r').readlines() if i.strip().startswith('import')])

runbiz = [i.rstrip() for i in open('../scripts/run-pynhm.py', 'r').readlines() if not i.strip().startswith('import')]
runbiz.append('print("#### RUN DONE, TIME TO POSTPROCESS ####")')
runbiz.extend([i.rstrip() for i in open('../scripts/post-process_model_output.py', 'r').readlines() if not i.strip().startswith('import')])


In [69]:
#runbiz

In [70]:
# dedupe the imports
imports = list(set(imports))


### now write out all the forward run stuff

In [71]:
with open(wkdir / 'forward_run.py', 'w') as ofp:
    [ofp.write(f'{line}\n') for line in imports+runbiz]

### and set the consolidated forward_run.py file to the pst object

In [72]:
pst.model_command = ['python forward_run.py']

In [73]:
pst.control_data.noptmax=0 #or -1 later, 0 at first

### set some PEST++ specific parmeters

In [74]:
pst.pestpp_options["ies_num_reals"] = 500  

pst.pestpp_options["ies_bad_phi_sigma"] = 2.5
pst.pestpp_options["overdue_giveup_fac"] = 4
pst.pestpp_options["ies_no_noise"] = False
pst.pestpp_options["ies_drop_conflicts"] = False
pst.pestpp_options["ies_pdc_sigma_distance"] = 3.0
pst.pestpp_options['ies_autoadaloc']=False
pst.pestpp_options['ies_num_threads']=4
pst.pestpp_options['ies_lambda_mults']=(0.1,1.0,10.0,100.0)
pst.pestpp_options['lambda_scale_fac'] = (0.75,0.9,1.0,1.1)
pst.pestpp_options['ies_subset_size']=10

# set SVD for some regularization
pst.svd_data.maxsing = 250

In [75]:
assert len(pst.observation_data.loc[pst.observation_data.weight==0]) >0

In [76]:
pst.parameter_data=pst.parameter_data[['parnme','partrans',	'parchglim', 'parval1',	'parlbnd', 'parubnd','pargp','scale', 'offset', 'dercom']]

### special case for just this one value with busted bounds 

In [77]:
#pst.parameter_data.loc['smidx_exp:hru_84017']

In [78]:
if 'smidx_exp:hru_84017' in pst.parameter_data.index:
    pst.parameter_data.loc['smidx_exp:hru_84017', 'parval1'] = 0.003
    pst.parameter_data.loc['smidx_exp:hru_84017', 'parubnd'] = .003*2
    

In [79]:
pst.write(str(wkdir / 'prior_mc.pst'), version=2)

noptmax:0, npar_adj:1375, nnz_obs:25904


In [80]:
#[pst.observation_data[i].isnull().unique() for i in pst.observation_data.columns]
obs.loc[obs.weight.isnull()].obgnme.unique()

array([], dtype=object)

In [81]:
obs.isnull().values.any()

False

In [82]:
len(pst.observation_data), len(pst.observation_data.dropna())

(92624, 92624)

In [83]:
pst.observation_data.loc[list(set(pst.observation_data.index) - set(pst.observation_data.dropna().index))]

Unnamed: 0,obsnme,obsval,weight,obgnme,lower_bound,upper_bound,standard_deviation


In [84]:
pst.observation_data.loc[(pst.observation_data.obgnme=='streamflow_daily_exlow') &
(pst.observation_data.obsval==0)]

Unnamed: 0,obsnme,obsval,weight,obgnme,lower_bound,upper_bound,standard_deviation
