# Build the pest interface, generate the Prior, and set observation values, weights, and noise

This notebook gets us ready to run pest++

In [None]:
%matplotlib inline
import os
import shutil
import psutil
import numpy as np
import flopy
import pandas as pd
import pyemu
import helpers

In [None]:
_ = helpers.get_domain_map()

Choose your original set of model files

In [None]:
safe_org_d = os.path.join("..", "models", "synthetic-valley-base-annual-optwell")
# safe_org_d = os.path.join("..", "models", "synthetic-valley-working-advanced-annual")
assert os.path.exists(safe_org_d)

Define the working directory

In [None]:
working_d = "model_and_pest_files"

If you want to run a prior monte carlo at the end of this notebook, and, if so, how many worker to use

In [None]:
run_prior_mc = False
num_workers = 15

In [None]:
cores = psutil.cpu_count(logical=False)
"this computer has {0} cores".format(cores)

And choose the length of the window (in months) to use for smoothing the observed timeseries

In [None]:
window = 18  # months

Make a copy of the safe set of model files and run mf6 in that directory

In [None]:
tmp_d = "temp"
if os.path.exists(tmp_d):
    shutil.rmtree(tmp_d)

Load the simulation from the original location, and re-write it in the temp directory

In [None]:
sim = flopy.mf6.MFSimulation.load(sim_ws=safe_org_d)
sim.set_sim_path(tmp_d)
gwf = sim.get_model()
gwf.set_all_data_external(external_data_folder=".")
sim.write_simulation()

In [None]:
sim.run_simulation()

Sometimes, you can get by being a lil bit "lose and fast" with the solver settings if you dont need high-fidelty derivatives...

In [None]:
sim.remove_package("ims")
ims = flopy.mf6.ModflowIms(
    sim,
    print_option="summary",
    complexity="complex",
    under_relaxation=None,
    linear_acceleration="bicgstab",
    outer_maximum=500,
    inner_maximum=100,
    outer_dvclose=1e-3,
    inner_dvclose=1e-3,
)

Usually a good idea to let the model keep running even if it fails to converge

In [None]:
sim.continue_ = True

In [None]:
sim.write_simulation()
sim.run_simulation()

Make a `PstFrom` instance.  This will be how we build up the pest interface

In [None]:
pf = pyemu.utils.PstFrom(
    tmp_d,
    working_d,
    remove_existing=True,
    spatial_reference=gwf.modelgrid,
    zero_based=False,
    start_datetime=gwf.start_datetime,
    echo=False,
    chunk_len=1000000,
)

We are using a model-post-processing function clean up and process csv output files.  We need to tell `PstFrom` to run that function after mf6 runs.  Open the "helpers.py" script in the notebooks/ directory and find the "process_csv_files()" functiuon.  What does it do?

In [None]:
helpers.process_csv_files(model_ws=pf.new_d)
pf.add_py_function("helpers.py", "process_csv_files()", is_pre_cmd=False)

Tell `PstFrom` to run mf6 as the "model"

In [None]:
pf.mod_sys_cmds.append("mf6")

Add the first set of model outputs as "observations" in the pest interface: "swgw-longterm-means.csv".  This csv contains the simulated equivalents to the predictions we are most concerned with:

In [None]:
df = pd.read_csv(os.path.join(pf.new_d, "swgw-longterm-means.csv"), index_col=0)
df = pf.add_observations(
    "swgw-longterm-means.csv",
    index_cols="quantity",
    prefix="forecasts",
    obsgp="forecasts",
    ofile_sep=",",
)
print(df.index.to_list())

Check your working directory to ensure that an instruction file was created

Now lets gather up all the output timeseries csv files we want to have as observations:

In [None]:
obs_csv_files = [
    f for f in os.listdir(pf.new_d) if f.startswith("sv.gwf") and f.endswith(".csv")
]
obs_csv_files.extend(
    [f for f in os.listdir(pf.new_d) if f.startswith("sv.lake") and f.endswith(".csv")]
)
obs_csv_files.extend(
    [f for f in os.listdir(pf.new_d) if f.startswith("sv.riv") and f.endswith(".csv")]
)
obs_csv_files.extend(
    [f for f in os.listdir(pf.new_d) if f.startswith("sv.sfr") and f.endswith(".csv")]
)
obs_csv_files.extend(
    [
        f
        for f in os.listdir(pf.new_d)
        if f.startswith("sv.spring") and f.endswith(".csv")
    ]
)
obs_csv_files.extend(
    [
        f
        for f in os.listdir(pf.new_d)
        if f.startswith("sv-budget") and f.endswith(".csv")
    ]
)
obs_csv_files

Loop over them and add each one to the interface

In [None]:
for obs_csv_file in obs_csv_files:
    print(obs_csv_file)
    prefix = obs_csv_file.replace(".", "-")
    df = pd.read_csv(os.path.join(pf.new_d, obs_csv_file), index_col=0)
    odf = pf.add_observations(
        obs_csv_file,
        index_cols="datetime",
        use_cols=df.columns.to_list(),
        prefix=prefix,
        ofile_sep=",",
    )
    print(odf)

Now some parameters.  Start with hk - the ole classic.  Find all of the HK input arrays that mf6 using.  These files contain the string "sv.npf_k_layer"

In [None]:
k_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.npf_k_layer")]
assert len(k_files) == gwf.dis.nlay.data

In [None]:
k_files.sort()
k_files

We need to define some spatial correlation functions/information for the pilot points (for both interpolation from pilot points to the grid and also for the Prior covariance).  We will use a different correlation function for each property type:

In [None]:
pp_v_k = pyemu.geostats.ExpVario(contribution=1.0, a=10000)
pp_geostruct_k = pyemu.geostats.GeoStruct(variograms=pp_v_k, transform="log")
pp_geostruct_k.plot()

Now define variograms and geostructs for k33, ss, and sy using the same naming scheme:


In [None]:
pp_v_k33 = pyemu.geostats.ExpVario(contribution=1.0, a=5000)
pp_geostruct_k33 = pyemu.geostats.GeoStruct(variograms=pp_v_k33, transform="log")
pp_v_ss = pyemu.geostats.ExpVario(contribution=1.0, a=15000)
pp_geostruct_ss = pyemu.geostats.GeoStruct(variograms=pp_v_ss, transform="log")
pp_v_sy = pyemu.geostats.ExpVario(contribution=1.0, a=7000)
pp_geostruct_sy = pyemu.geostats.GeoStruct(variograms=pp_v_sy, transform="none")

We will treat HK in layer 1 and 2 as same quantity - they will share pilot point multiplier parameters:

In [None]:
df = pf.add_parameters(
    k_files[:2],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.1,
    upper_bound=10.0,
    geostruct=pp_geostruct_k,
    par_name_base="hk-pp-wt",
    pargp="hk-pp-wt",
)

In [None]:
df.head()

Notice the bound information being passed - this will be used to define the prior distrbution later...

Since the pilot points are designed to accomodate spatial heterogeneity, let's also include a layer-constant parameter to help sample a wider range of HK values.  Tag this parameter with "hk-cn-wt" for "constant HK in the water table aquifer":

In [None]:
df = pf.add_parameters(
    k_files[:2],
    par_type="constant",
    lower_bound=0.1,
    upper_bound=10.0,
    par_name_base="hk-cn-wt",
    pargp="hk-cn-wt",
)
df

Do the same for HK in layers 4 and 5 together

In [None]:
df = pf.add_parameters(
    k_files[3:],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.1,
    upper_bound=10.0,
    geostruct=pp_geostruct_k,
    par_name_base="hk-pp-aq",
    pargp="hk-pp-aq",
)
df = pf.add_parameters(
    k_files[3:],
    par_type="constant",
    lower_bound=0.10,
    upper_bound=1.0,
    par_name_base="hk-cn-aq",
    pargp="hk-cn-aq",
)

To let us see what the actual HK array that mf6 sees, let's add that array as a set of observatitons also:

In [None]:
for k_file in [k_files[0], k_files[-1]]:
    print(k_file)
    pf.add_observations(
        k_file,
        obsgp=k_file.split(".")[1].replace("_", "-"),
        prefix=k_file.split(".")[1].replace("_", "-"),
    )

Setup a similar scheme of parameters to K33.  First find all of the K33 array files (tagged with ""sv.npf_k33_layer")

In [None]:
k33_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.npf_k33_layer")]
assert len(k_files) == gwf.dis.nlay.data
k33_files.sort()

Now setup pilot points and constant parameters for these arrays in a similar way that we did for K, but focusing on K33 of model layer 3 (the semi-confining unit):

In [None]:
df = pf.add_parameters(
    k33_files[2],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.01,
    upper_bound=100.0,
    geostruct=pp_geostruct_k33,
    par_name_base="k33-pp-conf",
    pargp="k33-pp-conf",
)
df = pf.add_parameters(
    k33_files[2],
    par_type="constant",
    lower_bound=0.1,
    upper_bound=10.0,
    par_name_base="k33-cn-conf",
    pargp="k33-cn-conf",
)

Now add the layer 3 K33 array as observations so we can monitor those values:

In [None]:
pf.add_observations(
    k33_files[2],
    obsgp=k33_files[2].split(".")[1].replace("_", "-"),
    prefix=k33_files[2].split(".")[1].replace("_", "-"),
)

And SS and sy (in layer 1 only).  Same as before: find the SS arrays (tagged with "sv.sto_ss")"


In [None]:
ss_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.sto_ss")]
assert len(ss_files) == gwf.dis.nlay.data

Add SS pilot points and constants for each model layer.  Use an upper bound and lower bound that give us lots of flexiibility to fit data...also add those SS arrays as observations

In [None]:
df = pf.add_parameters(
    ss_files[:2],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.05,
    upper_bound=20.0,
    geostruct=pp_geostruct_ss,
    par_name_base="ss-pp-wt",
    pargp="ss-pp-wt",
)
df = pf.add_parameters(
    ss_files[:2],
    par_type="constant",
    lower_bound=0.1,
    upper_bound=10.0,
    par_name_base="ss-cn-wt",
    pargp="ss-cn-wt",
)
df = pf.add_parameters(
    ss_files[2],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.05,
    upper_bound=20.0,
    geostruct=pp_geostruct_ss,
    par_name_base="ss-pp-conf",
    pargp="ss-pp-conf",
)
df = pf.add_parameters(
    ss_files[2],
    par_type="constant",
    lower_bound=0.05,
    upper_bound=20.0,
    par_name_base="ss-cn-conf",
    pargp="ss-cn-conf",
)
df = pf.add_parameters(
    ss_files[3:],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.05,
    upper_bound=20.0,
    geostruct=pp_geostruct_ss,
    par_name_base="ss-pp-aq",
    pargp="ss-pp-aq",
)
df = pf.add_parameters(
    ss_files[3:],
    par_type="constant",
    lower_bound=0.1,
    upper_bound=10.0,
    par_name_base="ss-cn-aq",
    pargp="ss-cn-aq",
)

pf.add_observations(
    ss_files[0],
    obsgp=ss_files[0].split(".")[1].replace("_", "-"),
    prefix=ss_files[0].split(".")[1].replace("_", "-"),
)

pf.add_observations(
    ss_files[2],
    obsgp=ss_files[2].split(".")[1].replace("_", "-"),
    prefix=ss_files[2].split(".")[1].replace("_", "-"),
)

pf.add_observations(
    ss_files[-1],
    obsgp=ss_files[-1].split(".")[1].replace("_", "-"),
    prefix=ss_files[-1].split(".")[1].replace("_", "-"),
)

Now sy - just in layer 1 tho.  and we need to be more convervative with the parameter bounds so that we dont get unrealistically high sy values

In [None]:
sy_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.sto_sy")]
assert len(sy_files) == gwf.dis.nlay.data
sy_files.sort()
sy_file = sy_files[0]
assert "layer1" in sy_file

In [None]:
df = pf.add_parameters(
    sy_file,
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.6,
    upper_bound=1.4,
    geostruct=pp_geostruct_sy,
    par_name_base="sy-pp-wt",
    pargp="sy-pp-wt",
    ult_ubound=1.0,
    transform="none",
)
df = pf.add_parameters(
    sy_file,
    par_type="constant",
    lower_bound=0.9,
    upper_bound=1.1,
    par_name_base="sy-cn-wt",
    pargp="sy-cn-wt",
    transform="none",
)
pf.add_observations(
    sy_file,
    obsgp=sy_file.split(".")[1].replace("_", "-"),
    prefix=sy_file.split(".")[1].replace("_", "-"),
)

Set up some parameters for the pumping wells - we arent going to adjust these (dont we all have perfect historic water use data?!), but we will use them as decision variables later

Now find any remaining wel/maw files:

In [None]:
wel_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.wel_stress")]
assert len(wel_files) > 0
wel_files.sort()

And add parameters for them:

In [None]:
wel_files

In [None]:
for wel_file in wel_files:
    kper = int(wel_file.split(".")[1].split("_")[-1]) - 1
    if kper == 0:
        continue
    pf.add_parameters(
        wel_file,
        par_type="grid",
        par_style="m",
        par_name_base="welrate_kper:{0}".format(kper),
        pargp="welrate_kper:{0}".format(kper),
        mfile_skip=0,
        index_cols=[0, 1, 2],
        use_cols=[3],
        mfile_fmt="free",
        upper_bound=3.0,
        lower_bound=0.0,
        transform="none",
    )

Add some recharge parameters for the base model - this is to try to account for the uncertainty that has been introduced through simplification...if we are using uzf, then add some small uncertainties for precip/infilt:

In [None]:
rech_files = [
    f for f in os.listdir(pf.new_d) if f.startswith("sv.rch_stress_period_data_")
]


assert len(rech_files) == sim.tdis.nper.data
rech_files.sort()
df = pf.add_parameters(
    rech_files,
    par_type="constant",
    lower_bound=0.9,
    upper_bound=1.1,
    par_name_base="rech_global",
    pargp="rech_global",
    transform="none",
    index_cols=[0, 1, 2],
    use_cols=[3],
    mfile_skip=0,
)
for rech_file in rech_files:
    kper = int(rech_file.split(".")[1].split("_")[-1]) - 1
    print(rech_file)
    df = pf.add_parameters(
        rech_file,
        par_type="constant",
        lower_bound=0.7,
        upper_bound=1.3,
        par_name_base="rech",
        pargp="rech",
        transform="none",
        index_cols=[0, 1, 2],
        use_cols=[3],
        mfile_skip=0,
    )

In [None]:
sfr_file = "sv.sfr_packagedata.txt"
if os.path.exists(os.path.join(pf.new_d, sfr_file)):
    pf.add_parameters(
        sfr_file,
        index_cols=[0, 1, 2],
        use_cols=[9],
        pargp="sfrhk",
        par_name_base="sfrhk",
        lower_bound=0.1,
        upper_bound=10,
        par_type="constant",
        mfile_skip=0,
    )
else:
    riv_files = [f for f in os.listdir(pf.new_d) if "riv_stress" in f]
    print(riv_files)
    pf.add_parameters(
        riv_files,
        index_cols=[0, 1, 2],
        use_cols=[4],
        pargp="rivcond",
        par_name_base="rivcond",
        lower_bound=0.1,
        upper_bound=10,
        par_type="constant",
        mfile_skip=0,
    )
    pf.add_parameters(
        riv_files,
        index_cols=[0, 1, 2],
        use_cols=[5],
        pargp="rivstage",
        par_name_base="rivstage",
        lower_bound=-0.5,
        upper_bound=0.5,
        par_type="constant",
        mfile_skip=0,
        par_style="a",
        transform="none",
        initial_value=0.0,
    )

Now build the interface and the control file:

In [None]:
pf.build_pst(filename="pest.pst")

Go to the `working_d` and see what has happened

Check the `obsval` quantities in the "* observation data" section - what are those numbers:

In [None]:
obs = pf.pst.observation_data
obs.obsval

So if the `obsval` values are all the existing model output values, then if we run the model again just the same way, we should have a phi of zero - a great check!. Let's do that

In [None]:
pf.pst.control_data.noptmax = 0

In [None]:
pf.pst.write(os.path.join(pf.new_d, "pest.pst"), version=2)

Use the `pyemu.os_utils.run()` function to run pestpp-ies with our new control file in the `working_d`

In [None]:
pyemu.os_utils.run("pestpp-ies pest.pst", cwd=pf.new_d)

Use the `Pst.set_res()` method to point to the "pest.base.rei" file and check the phi value:

In [None]:
pf.pst.set_res(os.path.join(pf.new_d, "pest.base.rei"))
pf.pst.phi

In [None]:
assert pf.pst.phi < 1.0e-2

now find the subset of parameters that have "wel" in the parnme (ie the name):

In [None]:
par = pf.pst.parameter_data

In [None]:
wellpars = par.loc[par.parnme.str.contains("wel"), :]
assert len(wellpars) > 0

Mark there "partrans" as "fixed":

In [None]:
par.loc[wellpars.parnme, "partrans"] = "fixed"

Now generate a Prior parameter ensemble (which the parameter bound and geostat info we passed to `PstFrom` above) via the `PstFrom.draw()` method.   Generate 1000 realizations

In [None]:
pe = pf.draw(num_reals=1000)

We need to enforce parameter bounds on those realizations, save it and add an arg to the control file to tell ies to use it:

In [None]:
pe.enforce()

Now save the parameter ensemble to a file in the `working_d` and tell pestpp-ies to use it

In [None]:
pe.to_csv(os.path.join(pf.new_d, "prior.csv"))
pf.pst.pestpp_options["ies_par_en"] = "prior.csv"

Save the control file one more time...

In [None]:
pf.pst.write(os.path.join(pf.new_d, "pest.pst"), version=2)

## Set observation values, weights and noise

So far, the control file only has simulated outputs and weights of 1 for everything.  

Load the actual "observation data"

In [None]:
# obs_csv_fname = os.path.join(
#     "..", "models", "synthetic-valley-truth-advanced-monthly", "raw_obs.csv"
# )
# assert os.path.exists(obs_csv_fname)
# obsdf = pd.read_csv(obs_csv_fname, index_col=0, parse_dates=True)
# obsdf

Models are always going to be low-pass filters compared to the complex natural systems that generated the observations.  So its usually a good idea to filter out high freq signal components.  Apply a rolling mean to each timeseries using the `window` parameter we defined earlier:

In [None]:
# smoothed = obsdf.rolling(window=window, center=True, min_periods=1).mean()
# for col in smoothed.columns:
#     fig, ax = plt.subplots(1, 1, figsize=(6, 3))
#     smoothed.loc[smoothed.index.year < 2015, col].plot(ax=ax, c="g")

#     obsdf.loc[obsdf.index.year < 2015, col].plot(ax=ax, c="m")

#     ax.set_title(col, loc="left")

# plt.show()

In [None]:
# smoothedlow = obsdf.rolling(window=window, center=True, min_periods=1).quantile(0.65)
# for col in smoothedlow.columns:
#     if "riv-flow" not in col:
#         continue
#     fig, ax = plt.subplots(1, 1, figsize=(6, 3))
#     smoothedlow.loc[smoothedlow.index.year < 2015, col].plot(ax=ax, c="g")

#     obsdf.loc[obsdf.index.year < 2015, col].plot(ax=ax, c="m")

#     ax.set_title(col, loc="left")

# plt.show()

In [None]:
# smoothed["riv-flow"] = smoothedlow["riv-flow"]

Now load the control file:

In [None]:
# pst = pyemu.Pst(os.path.join(working_d, "pest.pst"))

In [None]:
# obs = pst.observation_data

In [None]:
# obs.columns

Now for the tricky part: we need to find each simulated output that we have an observed counterpart for.  In practice, this usually requires some bespoke code/hackery

In [None]:
# nnobs = obs.loc[pd.notna(obs.usecol), :]

In [None]:
# prefixes = [
#     "wt",
#     "aq",
#     "lake-stage",
#     "lake-swgw",
#     "riv-flow",
#     "riv-swgw",
#     "diff1",
#     "diff0",
# ]
# for prefix in prefixes:
#     uobs = nnobs.loc[nnobs.usecol.str.contains(prefix), :].copy()
#     print(prefix, uobs.shape)
#     uobs["datetime"] = pd.to_datetime(uobs.datetime)
#     for usecol in uobs.usecol.unique():
#         print(usecol)
#         uuobs = uobs.loc[uobs.usecol == usecol, :].copy()
#         for dt, name in zip(uuobs.datetime, uuobs.obsnme):
#             oval = smoothed.loc[dt, usecol]
#             obs.loc[name, "obsval"] = oval

now we need to set the weights and expected noise for each observation datum:

In [None]:
obs = pf.pst.observation_data

In [None]:
obs["weight"] = 0.0
obs["standard_deviation"] = np.nan
obs["lower_bound"] = np.nan
obs["upper_bound"] = np.nan

In this block, the weights and noise for each observation is defined...do you agree with these values?

In [None]:
# obs_dict = {}
# hist_prefixes = ["wt", "aq", "lake-stage", "diff1", "diff0"]
# for prefix in hist_prefixes:
#     uobs = nnobs.loc[nnobs.usecol.str.startswith(prefix), :].copy()
#     print(uobs.usecol.unique())
#     uobs["datetime"] = pd.to_datetime(uobs.datetime)
#     hist_uobs = uobs.loc[uobs.datetime.dt.year < 2015, :]
#     obs.loc[hist_uobs.obsnme, "datetime"] = hist_uobs.datetime
#     if "lake" in prefix:
#         obs.loc[hist_uobs.obsnme, "weight"] = 5.0
#         obs.loc[hist_uobs.obsnme, "standard_deviation"] = 0.2
#     elif "diff" in prefix:
#         print(prefix)
#         obs.loc[hist_uobs.obsnme, "weight"] = [
#             3.0 if oval > 0.1 else 3.0 for oval in np.abs(hist_uobs.obsval)
#         ]
#         obs.loc[hist_uobs.obsnme, "standard_deviation"] = [
#             max(0.01, oval * 0.25) for oval in np.abs(hist_uobs.obsval)
#         ]
#         obs.loc[hist_uobs.obsnme, "lower_bound"] = 0.0
#     else:
#         obs.loc[hist_uobs.obsnme, "weight"] = 2.0
#         obs.loc[hist_uobs.obsnme, "standard_deviation"] = 0.5

if this is an advanced model, we can also use riv-flow information for history matching (one benefit of a more complex model) 

In [None]:
# if "riv-flow" in obs.usecol.unique():
#     uobs = nnobs.loc[nnobs.usecol == "riv-flow", :].copy()
#     uobs["datetime"] = pd.to_datetime(uobs.datetime)
#     hist_uobs = uobs.loc[uobs.datetime.dt.year < 2015, :].copy()
#     hist_uobs["standard_deviation"] = [
#         max(0.2, oval * 0.2) for oval in np.abs(hist_uobs.obsval.values)
#     ]
#     hist_uobs.loc[hist_uobs.obsnme, "weight"] = 1 / hist_uobs.standard_deviation.values
#     obs.loc[hist_uobs.obsnme, "standard_deviation"] = (
#         hist_uobs.standard_deviation.values
#     )
#     obs.loc[hist_uobs.obsnme, "weight"] = hist_uobs.weight.values
#     obs.loc[hist_uobs.obsnme, "datetime"] = hist_uobs.datetime

#     print(hist_uobs.loc[:, ["obsval", "standard_deviation", "weight"]])

Remember that we dont want sy to get too large?  Let's tell ies about that.  First find all observations with "sto-sy" in the name

In [None]:
# syobs = obs.loc[obs.obsnme.str.contains("sto-sy"), :]
# syobs.head()

Now give them all a `weight` of 1, an `obgnme` of "less_than_sy", and an `obsval` of 0.3.  this will setup inequality observations to (try to) keep sy less than 0.3

In [None]:
# obs.loc[syobs.obsnme, "weight"] = 1.0
# obs.loc[syobs.obsnme, "obgnme"] = "less_than_sy"
# obs.loc[syobs.obsnme, "obsval"] = 0.3

In [None]:
# pst.nnz_obs_groups

In [None]:
# helpers.final_steps(pst)

Set noptmax to 0, save the control file and do a test run:

In [None]:
pf.pst.control_data.noptmax = 0
pf.pst.write(os.path.join(working_d, "pest.pst"), version=2)
pyemu.os_utils.run("pestpp-ies pest.pst", cwd=working_d)

Now we are going to generate some autocorrelated timeseries noise to use in the history matching.  We are going to use a (very) long correlation lenght to express that we are more interested in low-frequency noise/error than high frequency

In [None]:
autocorrelation_length_days = 365 * 5

In [None]:
# nzobs = obs.loc[obs.weight > 0, :].copy()
# obs["distance"] = np.nan
# grps = nzobs.obgnme.unique()
# grps.sort()
# struct_dict = {}
# for grp in grps:
#     if "less_than_sy" in grp:
#         continue
#     gobs = nzobs.loc[nzobs.obgnme == grp, :].copy()
#     gobs["datetime"] = pd.to_datetime(gobs.datetime)
#     gobs["distance"] = (gobs.datetime - gobs.datetime.min()).dt.days
#     obs.loc[gobs.obsnme, "distance"] = gobs.distance
#     v = pyemu.geostats.ExpVario(contribution=1.0, a=autocorrelation_length_days)
#     gs = pyemu.geostats.GeoStruct(variograms=v, name=grp)
#     names = gobs.obsnme.to_list()
#     names.sort()
#     struct_dict[gs] = names

In [None]:
# struct_dict

Set a random seed to make sure we are getting the same draws:

In [None]:
# np.random.seed(pyemu.en.SEED)

Generate an obs+noise ensemble using `pyemu.helpers.autocorrelated_draw()`.  Draw 1000 realizations and enforce bounds

In [None]:
# noise = pyemu.helpers.autocorrelated_draw(
#     pst, struct_dict, num_reals=1000, verbose=True, enforce_bounds=True
# )

Explore the `noise` ensemble:


In [None]:
# noise.loc[:, pst.nnz_obs_names]

In [None]:
# wtobs = obs.loc[obs.obsnme.str.contains("wt"),:]
# usecols = wtobs.usecol.unique()
# usecols.sort()
# for usecol in usecols:
#     wwtobs = wtobs.loc[wtobs.usecol==usecol,:]
#     usecol = usecol.replace("wt","aq")
#     aqobs = obs.loc[obs.usecol==usecol,:]
#     assert aqobs.shape[0] > 0
#     usecol = usecol.replace("aq","diff")
#     dfobs = obs.loc[obs.usecol==usecol,:]
#     assert dfobs.shape[0] > 0
#     #print(noise.loc[:,dfobs.obsnme])
#     noise.loc[:,dfobs.obsnme] = noise.loc[:,wwtobs.obsnme].values - noise.loc[:,aqobs.obsnme].values
#     #print(noise.loc[:,dfobs.obsnme])



Save the noise ensemble, tell ies about it, and do an noptmax=-2 test run:

In [None]:
# noise.to_csv(os.path.join(working_d, "noise.csv"))
# pst.pestpp_options = {"ies_par_en": pst.pestpp_options["ies_par_en"]}
# pst.pestpp_options["ies_obs_en"] = "noise.csv"
# pst.control_data.noptmax = -2
# pst.write(os.path.join(working_d, "pest.pst"), version=2)
# pyemu.os_utils.run("pestpp-ies pest.pst", cwd=working_d)

As you can see from the phi group summary, we need some rebalanced weights.  One way to do this is the the ies_phi_factor_file.  First define a `dict` of string-tags:phi fractions we want:


In [None]:
# helpers.plot_ies_timeseries(working_d)

In [None]:
# phi_factors = {"lake": 0.15, "aq": 0.3, "wt": 0.3, "diff": 0.25, "less_than_sy": -999}
# if "riv-flow" in obs.usecol.unique():
#     phi_factors = {
#         "lake": 0.1,
#         "aq": 0.3,
#         "wt": 0.3,
#         "diff": 0.2,
#         "riv-flow": 0.1,
#         "less_than_sy": -999,
#     }

Write this information to a two-column csv file (no header!) in the `working_d`:

In [None]:
# ser = pd.Series(phi_factors)
# ser.to_csv(os.path.join(working_d, "phi_facs.csv"), index=True, header=False)

Now tell pestpp-ies to use it, re-write the control file and run pestpp-ies again:

In [None]:
# pst.pestpp_options["ies_phi_factor_file"] = "phi_facs.csv"
# pst.write(os.path.join(working_d, "pest.pst"), version=2)
# pyemu.os_utils.run("pestpp-ies pest.pst", cwd=working_d)

In [None]:
# if run_prior_mc:
#     master_d = "master_prior_mc"
#     pst.pestpp_options["ies_num_reals"] = 50
#     pst.control_data.noptmax = -1
#     pst.write(os.path.join(working_d, "pest.pst"), version=2)
#     pyemu.os_utils.start_workers(
#         pf.new_d,
#         "pestpp-ies",
#         "pest.pst",
#         worker_root=".",
#         num_workers=num_workers,
#         master_dir=master_d,
#     )
#     pst = pyemu.Pst(os.path.join(master_d, "pest.pst"))
#     helpers.plot_ies_forecasts(master_d)
#     helpers.plot_ies_timeseries(master_d)
#     tags = [
#         "npf-k-layer1",
#         "npf-k-layer5",
#         "npf-k33-layer3",
#         "sto-ss-layer1",
#         "sto-sy-layer1",
#     ]
#     for tag in tags:
#         helpers.plot_ies_properties(master_d, tag)