# Run PESTPP-OPT 

In [None]:
%matplotlib inline
import os
import shutil
import numpy as np
import pandas as pd
import pyemu
import flopy
import matplotlib.pyplot as plt

In [None]:
num_workers = 15
master_dir = "master_opt"

The existing master ies directory - needed so we can get the calibrated par values

In [None]:
ies_d = "master_ies"
assert os.path.exists(ies_d)

Define the working directory where the opt-modified control file will be 

In [None]:
working_d = "model_and_pest_files_opt"
if os.path.exists(working_d):
    shutil.rmtree(working_d)
shutil.copytree(ies_d, working_d)

Load the control file

In [None]:
pst = pyemu.Pst(os.path.join(working_d, "pest.pst"))
assert pst.nobs > pst.nnz_obs
assert pst.nnz_obs > 0

First we need to modify the parameter data section to identify which parameters will be adjusted during optimization - we call these decision variables:

In [None]:
par = pst.parameter_data

Get the posterior parameter ensemble and look for the "base" realization

In [None]:
post_iter = pst.ies.phiactual.iteration.max()

In [None]:
pe = pst.ies.get("paren", post_iter)
pe

In [None]:
if "base" in pe.index:
    print("using base realization")
    par.loc[pe.columns, "parval1"] = pe.loc["base", :].round(6).values

Find all parameters with "wel" in the name:

In [None]:
wpar = par.loc[par.parnme.str.contains("wel"), :].copy()
wpar["kper"] = wpar.kper.astype(int)
start_pred_kper = wpar.loc[wpar.parnme.str.contains("pred"), "kper"].min()
pred_wpar = wpar.loc[wpar.kper >= start_pred_kper, :]
assert pred_wpar.shape[0] > 0
pred_wpar.head()

Markk these well parameters as "none" transformed and modify their derivative calculation quantities to make sure we are using large enough perturbation increments

In [None]:
par.loc[pred_wpar.parnme, "partrans"] = "none"
par.loc[pred_wpar.parnme, "pargp"] = "decvar"
par.loc[pred_wpar.parnme, "parubnd"] = 1.5
par.loc[pred_wpar.parnme, "parlbnd"] = 0.0

pst.rectify_pgroups()
pst.parameter_groups.loc["decvar", "derinc"] = 0.2
pst.parameter_groups.loc["decvar", "inctyp"] = "absolute"
pst.pestpp_options["opt_dec_var_groups"] = "decvar"

Now modify the observation data section: set all obs to zero weight, then find constraints and set them accordingly

In [None]:
obs = pst.observation_data
obs["weight"] = 0.0
# find the river sw-gw exchange difference observation and perfer it to be 0 so that historical and future are equal
fobs = obs.loc[
    (obs.obsnme.str.contains("forecast"))
    & (obs.obsnme.str.contains("diff"))
    & (obs.obsnme.str.contains("riv-swgw")),
    :,
]
assert fobs.shape[0] == 1
cal_diff = pst.ies.get("obsen", post_iter).loc["base", fobs.obsnme]
obs.loc[fobs.obsnme, "weight"] = 1.0
obs.loc[fobs.obsnme, "obsval"] = 0.0  # cal_diff * 0.5
obs.loc[fobs.obsnme, "obgnme"] = "greater_than"

Find the sum of future gw pumping observation - this will be our objective function that we want to maximize:

In [None]:
wobs = obs.loc[
    (obs.obsnme.str.contains("forecast"))
    & (obs.obsnme.str.contains("pred"))
    & (obs.obsnme.str.contains("wel")),
    :,
]
assert len(wobs) == 1
# obs.loc[wobs.obsnme,"weight"] = 1.0
# obs.loc[wobs.obsnme,"obgnme"] = "greater_than"
obj_name = wobs.obsnme.values[0]
obj_name

Set constraints at each gw observation location that during the future/optimization period, we arent dewatering the upper or lower aquifer

In [None]:
gwobs = obs.loc[
    (obs.obsnme.str.contains("wt")) | (obs.obsnme.str.contains("aq")), :
].copy()
gwobs["i"] = gwobs.usecol.apply(lambda x: int(x.split("-")[1][1:]))
gwobs["j"] = gwobs.usecol.apply(lambda x: int(x.split("-")[2][1:]))
gwobs["k"] = -999
gwobs.loc[gwobs.obsnme.str.contains("aq"), "k"] = (
    1  # use the botm of the layer above to keep saturated conditions
)
gwobs.loc[gwobs.obsnme.str.contains("wt"), "k"] = 0

In [None]:
sim = flopy.mf6.MFSimulation.load(sim_ws=working_d, load_only=["dis"])
gwf = sim.get_model()
botm = gwf.dis.botm.array

In [None]:
tol = 1.0
gwobs["datetime"] = pd.to_datetime(gwobs.datetime)
gwobs = gwobs.loc[gwobs.datetime.dt.year > 2015, :].copy()
obs.loc[gwobs.obsnme, "obsval"] = gwobs.apply(
    lambda x: botm[x.k, x.i, x.j] + tol, axis=1
)
obs.loc[gwobs.obsnme, "obgnme"] = "greater_than"
obs.loc[gwobs.obsnme, "weight"] = 1.0

We also need to make sure that during each future stress period, we are producing the long-term historic avg production

In [None]:
wfobs = obs.loc[
    (obs.obsnme.str.contains("bud")) & (obs.obsnme.str.contains("pwell--out")), :
].copy()
wfobs["datetime"] = pd.to_datetime(wfobs.datetime)
hist_wfobs = wfobs.loc[wfobs.datetime.dt.year < 2015, :]
pred_wfobs = wfobs.loc[wfobs.datetime.dt.year >= 2015, :]
hist_max = pst.ies.get("obsen", post_iter).loc["base", hist_wfobs.obsnme].max()
hist_max

In [None]:
pred_wfobs

In [None]:
obs.loc[pred_wfobs.obsnme, "weight"] = 1.0
obs.loc[pred_wfobs.obsnme, "obsval"] = hist_max * 0.9
obs.loc[pred_wfobs.obsnme, "obgnme"] = "greater_than"

Set `noptmax` to 1 and identify the objective function

In [None]:
pst.control_data.noptmax = 1
pst.pestpp_options["opt_objective_function"] = obj_name
pst.pestpp_options["opt_direction"] = "max"

In [None]:
pst.write(os.path.join(working_d, "pest.pst"), version=2)

In [None]:
if os.path.exists(master_dir):
    shutil.rmtree(master_dir)
os.makedirs(master_dir)
pyemu.os_utils.start_workers(
    working_d,
    "pestpp-opt",
    "pest.pst",
    num_workers=num_workers,
    worker_root=master_dir,
    master_dir=master_dir,
)

# post processing PESTPP-OPT

check the final and initial objective function value:

In [None]:
opt_phi_value = pd.read_csv(
    os.path.join(master_dir, "pest.slp.iobj.csv"), index_col=0
).values[0][0]
org_phi_value = pst.ies.get("obsen", post_iter).loc["base", obj_name]
org_phi_value, opt_phi_value

So we are able to extract more gw but also make river sw-gw exchange have zero change into the future - nice!

Plot the optimal decision variable values:

In [None]:
opt_par_vals = pyemu.pst_utils.read_parfile(os.path.join(master_dir, "pest.1.par")).loc[
    wpar.parnme
]
ax = opt_par_vals["parval1"].plot(kind="bar")
ax.grid()

## investigating the response matrix

In [None]:
rm = pyemu.Matrix.from_binary(os.path.join(master_dir, "pest.1.jcb")).to_dataframe()

In [None]:
rm.loc[obj_name, :]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 100))
vals = np.log10(np.abs(rm.loc[pst.nnz_obs_names[1:], :].values))
cb = ax.imshow(vals)
plt.colorbar(cb)
ax.set_xticks(np.arange(vals.shape[1]))
ax.set_xticklabels(rm.columns.to_list(), rotation=90)
ax.set_yticks(np.arange(vals.shape[0]))
_ = ax.set_yticklabels(pst.nnz_obs_names[1:])

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
cf = np.abs(rm.loc[pst.nnz_obs_names[0], :].values) / np.abs(rm.loc[obj_name, :].values)
ax.bar(np.arange(cf.shape[0]), cf)
ax.set_xticks(np.arange(cf.shape[0]))
_ = ax.set_xticklabels(rm.columns, rotation=90)

# Reliability

In [None]:
oe = pst.ies.get("obsen", post_iter)
ax = oe.loc[:, fobs.obsnme].hist()
plt.tight_layout()

tell pestpp-opt how to use the uncertainty. We can also take advantage of the linear assumption and reuse the previous results so we dont need to any additional model runs (!)

In [None]:
oe.to_csv(os.path.join(working_d, "obs_stack.csv"))
pst.pestpp_options["opt_obs_stack"] = "obs_stack.csv"
pst.pestpp_options["opt_recalc_chance_every"] = 999
shutil.copy2(
    os.path.join(master_dir, "pest.1.jcb"), os.path.join(working_d, "respmat.jcb")
)
pst.pestpp_options["base_jacobian"] = "respmat.jcb"
shutil.copy2(
    os.path.join(master_dir, "pest.1.jcb.rei"), os.path.join(working_d, "hotstart.rei")
)
pst.pestpp_options["hotstart_resfile"] = "hotstart.rei"
pst.pestpp_options["opt_skip_final"] = True
pst.pestpp_options["opt_risk"] = 0.85


par = pst.parameter_data
par.loc[par.partrans == "fixed", "partrans"] = "log"
pst.write(os.path.join(working_d, "pest.pst"))

Rerun pestpp-opt :

In [None]:
pyemu.os_utils.run("pestpp-opt pest.pst", cwd=working_d)

In [None]:
opt_phi_value = pd.read_csv(
    os.path.join(working_d, "pest.slp.iobj.csv"), index_col=0
).values[0][0]
org_phi_value = pst.ies.get("obsen", post_iter).loc["base", obj_name]
org_phi_value, opt_phi_value

In [None]:
opt_par_vals = pyemu.pst_utils.read_parfile(os.path.join(working_d, "pest.1.par")).loc[
    wpar.parnme
]
ax = opt_par_vals["parval1"].plot(kind="bar")
ax.grid()