# Run PESTPP-OPT 

In [None]:
%matplotlib inline
import os
import shutil
import numpy as np
import pandas as pd
import pyemu
import matplotlib.pyplot as plt

In [None]:
num_workers = 15
master_d = "master_opt"

The existing master ies directory - needed so we can get the calibrated par values

In [None]:
ies_d = "master_ies"
assert os.path.exists(ies_d)

Define the working directory where the opt-modified control file will be 

In [None]:
working_d = "model_and_pest_files_opt"
if os.path.exists(working_d):
    shutil.rmtree(working_d)
shutil.copytree(ies_d, working_d)

Load the control file

In [None]:
pst = pyemu.Pst(os.path.join(working_d, "pest.pst"))
assert pst.nobs > pst.nnz_obs
assert pst.nnz_obs > 0

First we need to modify the parameter data section to identify which parameters will be adjusted during optimization - we call these decision variables:

In [None]:
par = pst.parameter_data

Get the posterior parameter ensemble and look for the "base" realization

In [None]:
post_iter = pst.ies.phiactual.iteration.max()

In [None]:
pe = pst.ies.get("paren", post_iter)
pe

In [None]:
if "base" in pe.index:
    print("using base realization")
    par.loc[pe.columns, "parval1"] = pe.loc["base", :].round(6).values

Find all parameters with "wel" in the name:

In [None]:
wpar = par.loc[par.parnme.str.contains("wel"), :].copy()
wpar["kper"] = wpar.kper.astype(int)
start_pred_kper = wpar.loc[wpar.parnme.str.contains("pred"), "kper"].min()
pred_wpar = wpar.loc[wpar.kper >= start_pred_kper, :]
assert pred_wpar.shape[0] > 0
print(pred_wpar.shape[0], "decision variables")
pred_wpar.head()

Markk these well parameters as "none" transformed and modify their derivative calculation quantities to make sure we are using large enough perturbation increments

In [None]:
par.loc[pred_wpar.parnme, "partrans"] = "none"
par.loc[pred_wpar.parnme, "pargp"] = "decvar"
par.loc[pred_wpar.parnme, "parubnd"] = 1.5
par.loc[pred_wpar.parnme, "parlbnd"] = 0.0
predwel_wpar = wpar.loc[wpar.parnme.str.contains("pred"), "parnme"]
assert len(predwel_wpar) > 0
par.loc[predwel_wpar, "parval1"] = 0.0  # set the pred well rates to zero initially

pst.rectify_pgroups()
pst.parameter_groups.loc["decvar", "derinc"] = 0.2
pst.parameter_groups.loc["decvar", "inctyp"] = "absolute"
pst.pestpp_options["opt_dec_var_groups"] = "decvar"

Now modify the observation data section: set all obs to zero weight, then find constraints and set them accordingly

In [None]:
obs = pst.observation_data
obs["weight"] = 0.0
# find the river sw-gw exchange difference observation and perfer it to be 0 so that historical and future are equal
fobs = obs.loc[
    (obs.obsnme.str.contains("forecast"))
    & (obs.obsnme.str.contains("diff"))
    & (obs.obsnme.str.contains("riv-swgw")),
    :,
]
assert fobs.shape[0] == 1
cal_diff = pst.ies.get("obsen", post_iter).loc["base", fobs.obsnme]
obs.loc[fobs.obsnme, "weight"] = 1.0
obs.loc[fobs.obsnme, "obsval"] = 0  # cal_diff * 0.5
obs.loc[fobs.obsnme, "obgnme"] = "greater_than"

Find the sum of future gw pumping observation - this will be our objective function that we want to maximize:

In [None]:
wobs = obs.loc[
    (obs.obsnme.str.contains("forecast"))
    & (obs.obsnme.str.contains("pred"))
    & (obs.obsnme.str.contains("wel")),
    :,
]
assert len(wobs) == 1
# obs.loc[wobs.obsnme,"weight"] = 1.0
# obs.loc[wobs.obsnme,"obgnme"] = "greater_than"
obj_name = wobs.obsnme.values[0]
obj_name

We also need to make sure that during each future stress period, we are meeting the long-term historic production rates

In [None]:
wfobs = obs.loc[
    (obs.obsnme.str.contains("bud")) & (obs.obsnme.str.contains("pwell--out")), :
].copy()
wfobs["datetime"] = pd.to_datetime(wfobs.datetime)
hist_wfobs = wfobs.loc[wfobs.datetime.dt.year < 2015, :]
pred_wfobs = wfobs.loc[wfobs.datetime.dt.year >= 2015, :]
hist_max = pst.ies.get("obsen", post_iter).loc["base", hist_wfobs.obsnme].max()
hist_max

In [None]:
hist_wfobs

In [None]:
pred_wfobs

In [None]:
obs.loc[pred_wfobs.obsnme, "weight"] = 1.0
obs.loc[pred_wfobs.obsnme, "obsval"] = hist_max * 0.9
obs.loc[pred_wfobs.obsnme, "obgnme"] = "greater_than"

Set `noptmax` to 1 and identify the objective function

In [None]:
pst.control_data.noptmax = 1
pst.pestpp_options["opt_objective_function"] = obj_name
pst.pestpp_options["opt_direction"] = "max"

In [None]:
pst.write(os.path.join(working_d, "pest.pst"), version=2)

In [None]:
if os.path.exists(master_d):
    shutil.rmtree(master_d)
os.makedirs(master_d)
pyemu.os_utils.start_workers(
    working_d,
    "pestpp-opt",
    "pest.pst",
    num_workers=num_workers,
    worker_root=master_d,
    master_dir=master_d,
)

# post processing PESTPP-OPT

check the final and initial objective function value:

In [None]:
opt_phi_value = pd.read_csv(
    os.path.join(master_d, "pest.slp.iobj.csv"), index_col=0
).values[0][0]
hist_max, opt_phi_value

So we are able to extract more gw but also make river sw-gw exchange have zero change into the future - nice!

Plot the optimal decision variable values:

In [None]:
opt_par_vals = pyemu.pst_utils.read_parfile(os.path.join(master_d, "pest.1.par")).loc[
    wpar.parnme
]
ax = opt_par_vals["parval1"].plot(kind="bar")
ax.grid()

## investigating the response matrix

In [None]:
rm = pyemu.Matrix.from_binary(os.path.join(master_d, "pest.1.jcb")).to_dataframe()

In [None]:
rm.loc[obj_name, :]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
vals = np.log10(np.abs(rm.loc[pst.nnz_obs_names[1:], :].values))
cb = ax.imshow(vals)
plt.colorbar(cb)
ax.set_xticks(np.arange(vals.shape[1]))
ax.set_xticklabels(rm.columns.to_list(), rotation=90)
ax.set_yticks(np.arange(vals.shape[0]))
_ = ax.set_yticklabels(pst.nnz_obs_names[1:])

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
cf = np.abs(rm.loc[pst.nnz_obs_names[0], :].values) / np.abs(rm.loc[obj_name, :].values)
ax.bar(np.arange(cf.shape[0]), cf)
ax.set_xticks(np.arange(cf.shape[0]))
_ = ax.set_xticklabels(rm.columns, rotation=90)

# Reliability

plot the histogram of the river swgw flux difference (the obs we used as a constraint)

In [None]:
oe = pst.ies.get("obsen", post_iter)
ax = oe.loc[:, fobs.obsnme].hist()
plt.tight_layout()

tell pestpp-opt how to use the uncertainty. We can also take advantage of the linear assumption and reuse the previous results so we dont need to any additional model runs (!)

In [None]:
oe.to_csv(os.path.join(working_d, "obs_stack.csv"))
pst.pestpp_options["opt_obs_stack"] = "obs_stack.csv"
pst.pestpp_options["opt_recalc_chance_every"] = 999
shutil.copy2(
    os.path.join(master_d, "pest.1.jcb"), os.path.join(working_d, "respmat.jcb")
)
pst.pestpp_options["base_jacobian"] = "respmat.jcb"
shutil.copy2(
    os.path.join(master_d, "pest.1.jcb.rei"), os.path.join(working_d, "hotstart.rei")
)
pst.pestpp_options["hotstart_resfile"] = "hotstart.rei"
pst.pestpp_options["opt_skip_final"] = True
pst.pestpp_options["opt_risk"] = 0.6


par = pst.parameter_data
par.loc[par.partrans == "fixed", "partrans"] = "log"
pst.write(os.path.join(working_d, "pest.pst"))

Rerun pestpp-opt :

In [None]:
pyemu.os_utils.run("pestpp-opt pest.pst", cwd=working_d)

Visualize these results the same we did earlier:

In [None]:
reliable_phi_value = pd.read_csv(
    os.path.join(working_d, "pest.slp.iobj.csv"), index_col=0
).values[0][0]
hist_max, opt_phi_value, reliable_phi_value

In [None]:
opt_par_vals = pyemu.pst_utils.read_parfile(os.path.join(working_d, "pest.1.par")).loc[
    wpar.parnme
]
ax = opt_par_vals["parval1"].plot(kind="bar")
ax.grid()

In [None]:
obs.loc[pred_wfobs.obsnme, "obsval"] = hist_max * 0.6
pst.pestpp_options["opt_risk"] = 0.9

In [None]:
pst.write(os.path.join(working_d, "pest.pst"))

In [None]:
pyemu.os_utils.run("pestpp-opt pest.pst", cwd=working_d)