# Run PESTPP-OPT 

In this notebook, we will run pestpp-opt using the pestpp-ies results

In [None]:
%matplotlib inline
import os
import shutil
import numpy as np
import pandas as pd
import pyemu
import matplotlib.pyplot as plt
import helpers

In [None]:
_ = helpers.get_domain_map()

Set some important vars for this notebook

In [None]:
num_workers = 10
master_d = "master_opt"

The existing master ies directory - needed so we can get the calibrated par values

Define the working directory where the opt-modified control file will be 

In [None]:
org_working_d = "model_and_pest_files"
working_d = "model_and_pest_files_opt"
if os.path.exists(working_d):
    shutil.rmtree(working_d)
shutil.copytree(org_working_d, working_d)

Load the control file

In [None]:
pst = pyemu.Pst(os.path.join(working_d, "pest.pst"))

First we need to modify the parameter data section to identify which parameters will be adjusted during optimization - we call these decision variables:

In [None]:
par = pst.parameter_data

Get the posterior parameter ensemble and look for the "base" realization

In [None]:
wpar = par.loc[par.parnme.str.contains("wel"), :].copy()

cast the `kper` to an "int", then make sure we are only adjusting wel pars that correspond to the future period

In [None]:
wpar["kper"] = wpar.kper.astype(int)
wpar["i"] = wpar.idx1.astype(int)
wpar["j"] = wpar.idx2.astype(int)
wpar["k"] = wpar.idx0.astype(int)

Markk these well parameters as "none" transformed and modify their derivative calculation quantities to make sure we are using large enough perturbation increments

In [None]:
par.loc[wpar.parnme, "partrans"] = "none"
par.loc[wpar.parnme, "pargp"] = "decvar"
par.loc[wpar.parnme, "parubnd"] = 1.5
par.loc[wpar.parnme, "parlbnd"] = 0.0
par.loc[wpar.parnme, "parval1"] = 0.0  # set the pred well rates to zero initially

pst.rectify_pgroups()
pst.parameter_groups.loc["decvar", "derinc"] = 0.2
pst.parameter_groups.loc["decvar", "inctyp"] = "absolute"
pst.pestpp_options["opt_dec_var_groups"] = "decvar"

Now modify the observation data section: set all obs to zero weight, then find constraints and set them accordingly

In [None]:
obs = pst.observation_data
obs["weight"] = 0.0

find any observation name that have "forecast", "diff" and "riv-swgw" in the name.  There should be exactly 1 of these

In [None]:
# find the river sw-gw exchange difference observation and perfer it to be 0 so that historical and future are equal
swgwobs = obs.loc[obs.obsnme.str.contains("usecol:riv-swgw"), :]
swgwobs.shape[0]

In [None]:
obs.loc[swgwobs.obsnme, "weight"] = 1.0
# obs.loc[fobs.obsnme, "obsval"] = 0  # cal_diff * 0.5
obs.loc[swgwobs.obsnme, "obgnme"] = "less_than"

In [None]:
springobs = obs.loc[obs.obsnme.str.contains("sv-spring-obs"), :]
springobs.shape[0]

In [None]:
springobs

In [None]:
obs.loc[springobs.obsnme, "weight"] = 1.0
# obs.loc[fobs.obsnme, "obsval"] = 0  # cal_diff * 0.5
obs.loc[springobs.obsnme, "obgnme"] = "less_than"

Find the sum of future gw pumping observation - this will be our objective function that we want to maximize.  The observation should have "forecast","pred" and "wel" in the name.  There should be only one observation.  Assign this observation to a variable called `obj_name`

In [None]:
wobs = obs.loc[
    (obs.obsnme.str.contains("forecast"))
    & (obs.obsnme.str.contains("pred"))
    & (obs.obsnme.str.contains("wel")),
    :,
]
assert len(wobs) == 1
# obs.loc[wobs.obsnme,"weight"] = 1.0
# obs.loc[wobs.obsnme,"obgnme"] = "greater_than"
obj_name = wobs.obsnme.values[0]
obj_name

We also need to make sure that during each future stress period, we are meeting the long-term historic production rates.  
First find all observations with "bud" and "pwell--out" in the name:

In [None]:
wfobs = obs.loc[
    (obs.obsnme.str.contains("bud")) & (obs.obsnme.str.contains("wel-0--out")), :
].copy()
wfobs.index.to_list()

Now cast the `datetime` column using `pd.to_datetime()`

In [None]:
wfobs["datetime"] = pd.to_datetime(wfobs.datetime)

Now split these well-flux obs into historic and predictive/future by the year 2015

In [None]:
hist_wfobs = wfobs.loc[wfobs.datetime.dt.year < 2015, :]
pred_wfobs = wfobs.loc[wfobs.datetime.dt.year >= 2015, :]
hist_wfobs

What was the historic maximum gw production rate (using the posterior ensemble base realization)?

In [None]:
hist_max = obs.loc[hist_wfobs.obsnme, "obsval"].max()
hist_max

In [None]:
pred_wfobs.index.to_list()

for the future/predictive well flux observations, set the `weight` to 1.0, the obsval to 90% of the historic max (assuming some future water conservation) and the `obgnme` to "greater_than"

In [None]:
obs.loc[wfobs.obsnme, "weight"] = 1.0
obs.loc[wfobs.obsnme, "obsval"] *= 0.9
obs.loc[wfobs.obsnme, "obgnme"] = "greater_than"

Set `noptmax` to 1 and identify the objective function via the "opt_objective_function" argument.  Set the "opt_direction" to "max", telling pestpp-opt to maximize the future groundwater production

In [None]:
pst.control_data.noptmax = 1
pst.pestpp_options["opt_objective_function"] = obj_name
pst.pestpp_options["opt_direction"] = "max"

Save the control file

In [None]:
pst.write(os.path.join(working_d, "pest.pst"), version=2)

Run pestpp-opt using the `pyemu.os_utils.start_workers()` function:

In [None]:
if os.path.exists(master_d):
    shutil.rmtree(master_d)
os.makedirs(master_d)
pyemu.os_utils.start_workers(
    working_d,
    "pestpp-opt",
    "pest.pst",
    num_workers=num_workers,
    worker_root=master_d,
    master_dir=master_d,
)

# post processing PESTPP-OPT

check the final and initial objective function value:

In [None]:
opt_phi_value = pd.read_csv(
    os.path.join(master_d, "pest.slp.iobj.csv"), index_col=0
).values[0][0]
hist_max, opt_phi_value

In [None]:
opt_par_vals = pyemu.pst_utils.read_parfile(os.path.join(master_d, "pest.1.par"))

In [None]:
opt_par_vals = opt_par_vals.loc[wpar.parnme, :]
opt_par_vals

In [None]:
import flopy

In [None]:
sim = flopy.mf6.MFSimulation.load(sim_ws=master_d)

In [None]:
gwf = sim.get_model()

In [None]:
riv = gwf.riv.stress_period_data.array[0]
springi = min([ci[1] for ci in riv["cellid"]])
springj = max([ci[2] for ci in riv["cellid"]])
springi, springj

In [None]:
import xarray as xa

In [None]:
nc_path = os.path.join("..", "synthetic-valley", "data", "synthetic_valley_truth.nc")
nc_ds = xa.open_dataset(nc_path)
lake_location = nc_ds["lake_location"].to_numpy()

In [None]:
def plot(gwf):
    with flopy.plot.styles.USGSMap():
        fig, axs = plt.subplots(1, 2, figsize=(8, 5), sharey=True)

        ax = axs[0]
        ax.set_xlim(0, 12500)
        ax.set_ylim(0, 20000)
        mm = flopy.plot.PlotMapView(model=gwf, ax=ax, extent=gwf.modelgrid.extent)
        mm.plot_array(lake_location, cmap="Blues_r", masked_values=[0])
        mm.plot_grid(lw=0.5, color="0.5")
        mm.plot_bc("riv", label="river")
        # mm.plot_bc("wel_0", kper=1, plotAll=True, label="well")
        # ax.scatter(x, y, s=3, c="black")
        # for i in range(len(xy)):
        #     ax.annotate(f"wt{i + 1}", (x[i], y[i]))

        # ax.set_title("Water Table")

        ax = axs[1]
        ax.set_xlim(0, 12500)
        ax.set_ylim(0, 20000)
        mm = flopy.plot.PlotMapView(model=gwf, ax=ax, extent=gwf.modelgrid.extent)
        mm.plot_grid(lw=0.5, color="0.5")
        # mm.plot_bc("wel_0", kper=1, plotAll=True, label="well")
        # ax.scatter(x, y, s=3, c="black")
        # for i in range(len(xy)):
        #     ax.annotate(f"aq{i + 1}", (x[i], y[i]))

        # ax.set_title("Lower Aquifer")
        return fig, axs

In [None]:
X = gwf.modelgrid.xcellcenters
Y = gwf.modelgrid.ycellcenters

kpers = wpar.kper.unique()
kpers.sort()
ks = wpar.k.unique()
ks.sort()
ifig = 0
for kper in kpers:
    kpar = wpar.loc[wpar.kper == kper, :]

    fig, axs = plot(gwf)
    print(len(axs))
    for k, ax in zip(ks, axs):
        kkpar = kpar.loc[kpar.k == k, :]
        # print(opt_par_vals.loc[kkpar.parnme,"parval1"].values)
        arr = np.zeros((gwf.dis.nrow.data, gwf.dis.ncol.data)) - 999
        for i, j, val in zip(
            kkpar.i.values,
            kkpar.j.values,
            opt_par_vals.loc[kkpar.parnme, "parval1"].values,
        ):
            # print(i,j,val)
            arr[i, j] = val
        arr[arr == -999] = np.nan
        arr -= 1
        ax.pcolormesh(X, Y, arr, zorder=1000, cmap="bwr")

        # print(np.nanmin(arr),np.nanmax(arr))
    axs[0].set_title("Water Table Aquifer Year {0}".format(ifig + 1), loc="left")
    axs[0].set_facecolor("0.9")
    axs[1].set_title("Semi-Confined Aquifer Year {0}".format(ifig + 1), loc="left")
    axs[1].set_facecolor("0.9")
    axs[1].scatter(X[springi, springj], Y[springi, springj], marker="^", s=80, c="m")
    plt.savefig(os.path.join(master_d, "fig{0:04d}.png".format(ifig)), dpi=400)
    plt.close(fig)
    ifig += 1

In [None]:
fps = 5
pyemu.os_utils.run(
    "ffmpeg -i fig{0:04d}.png -vf palettegen=256 palette.png".format(ifig - 1),
    cwd=master_d,
)
pyemu.os_utils.run(
    'ffmpeg -r {0} -y -s 1920X1080 -i fig%04d.png -i palette.png -filter_complex "scale=720:-1:flags=lanczos[x];[x][1:v]paletteuse" temp.gif'.format(
        fps
    ),
    cwd=master_d,
)

## investigating the response matrix

Load the response matrix ("pest.1.jcb") using `pyemu.Matrix.from_binary()`

In [None]:
rm = pyemu.Matrix.from_binary(os.path.join(master_d, "pest.1.jcb")).to_dataframe()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
cf = np.abs(rm.loc[pst.nnz_obs_names[0], :].values) / np.abs(rm.loc[obj_name, :].values)
ax.bar(np.arange(cf.shape[0]), cf)
ax.set_xticks(np.arange(cf.shape[0]))
_ = ax.set_xticklabels(rm.columns, rotation=90)
ax.set_title(" long-term difference sw-gw capture fraction")

# Reliability

plot the posterior histogram of the river swgw flux difference (the obs we used as a constraint)

In [None]:
# oe = pst.ies.get("obsen", post_iter)
# ax = oe.loc[:, fobs.obsnme].hist()
# plt.tight_layout()

tell pestpp-opt how to use the uncertainty. We can also take advantage of the linear assumption and reuse the previous results so we dont need to any additional model runs (!)

In [None]:
# save the posterior obs ensemble in the opt working dir
# oe.to_csv(os.path.join(working_d, "obs_stack.csv"))
# pst.pestpp_options["opt_obs_stack"] = "obs_stack.csv"
# dont try to update the chance estimates
pst.pestpp_options["opt_recalc_chance_every"] = 999

# reuse the response matrix
shutil.copy2(
    os.path.join(master_d, "pest.1.jcb"), os.path.join(working_d, "respmat.jcb")
)
pst.pestpp_options["base_jacobian"] = "respmat.jcb"

# reuse the initial residuals
shutil.copy2(
    os.path.join(master_d, "pest.1.jcb.rei"), os.path.join(working_d, "hotstart.rei")
)
pst.pestpp_options["hotstart_resfile"] = "hotstart.rei"

# dont do a final model run
pst.pestpp_options["opt_skip_final"] = True

# look for a 60% reliable solution
pst.pestpp_options["opt_risk"] = 0.6


par = pst.parameter_data
pst.write(os.path.join(working_d, "pest.pst"))

Rerun pestpp-opt :

In [None]:
pyemu.os_utils.run("pestpp-opt pest.pst", cwd=working_d)

Visualize these results the same we did earlier:

In [None]:
reliable_phi_value = pd.read_csv(
    os.path.join(working_d, "pest.slp.iobj.csv"), index_col=0
).values[0][0]
hist_max, opt_phi_value, reliable_phi_value

In [None]:
opt_par_vals = pyemu.pst_utils.read_parfile(os.path.join(working_d, "pest.1.par")).loc[
    wpar.parnme
]
ax = opt_par_vals["parval1"].plot(kind="bar", figsize=(10, 10))
ax.grid()

Have a play at optimizing the optimization hyper parameters:

In [None]:
# how much water convervation and usage efficiency we expect in the future
obs.loc[pred_wfobs.obsnme, "obsval"] = hist_max * 1
# how reliable the optimal solution is
pst.pestpp_options["opt_risk"] = 0.8

In [None]:
pst.write(os.path.join(working_d, "pest.pst"))

In [None]:
pyemu.os_utils.run("pestpp-opt pest.pst", cwd=working_d)