# Build the pest interface and generate the Prior

In [None]:
%matplotlib inline
import os
import shutil
import flopy
import pandas as pd
import pyemu
import helpers

In [None]:
# these are slow as...
# safe_org_d = os.path.join("..", "models", "synthetic-valley-working-monthly")
# safe_org_d = os.path.join("..", "models", "synthetic-valley-working_advanced-monthly")


safe_org_d = os.path.join("..", "models", "synthetic-valley-working-annual")
# safe_org_d = os.path.join("..", "models", "synthetic-valley-working-advanced-annual")
assert os.path.exists(safe_org_d)

Make a copy of the safe set of model files and run mf6 in that directory

In [None]:
tmp_d = "temp"
if os.path.exists(tmp_d):
    shutil.rmtree(tmp_d)

In [None]:
sim = flopy.mf6.MFSimulation.load(sim_ws=safe_org_d)
sim.set_sim_path(tmp_d)
gwf = sim.get_model()
gwf.set_all_data_external(external_data_folder=".")
sim.write_simulation()

In [None]:
sim.run_simulation()

In [None]:
sim.remove_package("ims")
ims = flopy.mf6.ModflowIms(
    sim,
    print_option="summary",
    complexity="complex",
    under_relaxation=None,
    linear_acceleration="bicgstab",
    outer_maximum=500,
    inner_maximum=100,
    outer_dvclose=1e-3,
    inner_dvclose=1e-3,
)

In [None]:
sim.continue_ = True

In [None]:
sim.write_simulation()
sim.run_simulation()

Make a `PstFrom` instance

In [None]:
pf = pyemu.utils.PstFrom(
    tmp_d,
    "model_and_pest_files",
    remove_existing=True,
    spatial_reference=gwf.modelgrid,
    zero_based=False,
    start_datetime=gwf.start_datetime,
    echo=False,
    chunk_len=1000000,
)

We are using a model-post-processing function clean up and process csv output files.  We need to tell `PstFrom` to run that function after mf6 runs:

In [None]:
helpers.process_csv_files(model_ws=pf.new_d)
pf.add_py_function("helpers.py", "process_csv_files()", is_pre_cmd=False)

Tell `PstFrom` to run mf6 as the "model"

In [None]:
pf.mod_sys_cmds.append("mf6")

Add the first set of model outputs as "observations" in the pest interface: "swgw-longterm-means.csv":

In [None]:
df = pd.read_csv(os.path.join(pf.new_d, "swgw-longterm-means.csv"), index_col=0)
df = pf.add_observations(
    "swgw-longterm-means.csv",
    index_cols="quantity",
    prefix="forecasts",
    obsgp="forecasts",
    ofile_sep=",",
)
print(df)

Now lets gather up all the output timeseries csv file we want to have as observations:

In [None]:
obs_csv_files = [
    f for f in os.listdir(pf.new_d) if f.startswith("sv.gwf") and f.endswith(".csv")
]
obs_csv_files.extend(
    [f for f in os.listdir(pf.new_d) if f.startswith("sv.lake") and f.endswith(".csv")]
)
obs_csv_files.extend(
    [f for f in os.listdir(pf.new_d) if f.startswith("sv.riv") and f.endswith(".csv")]
)
obs_csv_files.extend(
    [f for f in os.listdir(pf.new_d) if f.startswith("sv.sfr") and f.endswith(".csv")]
)
obs_csv_files.extend(
    [
        f
        for f in os.listdir(pf.new_d)
        if f.startswith("sv-budget") and f.endswith(".csv")
    ]
)
obs_csv_files

Loop over them and add each one to the interface

In [None]:
for obs_csv_file in obs_csv_files:
    print(obs_csv_file)
    prefix = obs_csv_file.replace(".", "-")
    df = pd.read_csv(os.path.join(pf.new_d, obs_csv_file), index_col=0)
    odf = pf.add_observations(
        obs_csv_file,
        index_cols="datetime",
        use_cols=df.columns.to_list(),
        prefix=prefix,
        ofile_sep=",",
    )
    print(odf)

Now some parameters.  Start with hk - the ole classic:

In [None]:
k_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.npf_k_layer")]
assert len(k_files) == gwf.dis.nlay.data

In [None]:
k_files.sort()
k_files

We need to define some spatial correlation functions/information for the pilot points (for both interpolation from pilot points to the grid and also for the Prior covariance).  We will use a different correlation function for each property type:

In [None]:
pp_v_k = pyemu.geostats.ExpVario(contribution=1.0, a=10000)
pp_geostruct_k = pyemu.geostats.GeoStruct(variograms=pp_v_k, transform="log")
pp_v_k33 = pyemu.geostats.ExpVario(contribution=1.0, a=15000)
pp_geostruct_k33 = pyemu.geostats.GeoStruct(variograms=pp_v_k33, transform="log")
pp_v_ss = pyemu.geostats.ExpVario(contribution=1.0, a=5000)
pp_geostruct_ss = pyemu.geostats.GeoStruct(variograms=pp_v_ss, transform="log")
pp_v_sy = pyemu.geostats.ExpVario(contribution=1.0, a=2000)
pp_geostruct_sy = pyemu.geostats.GeoStruct(variograms=pp_v_sy, transform="none")

We will treat HK in layer 1 and 2 as same quantity - they will share pilot point multiplier parameters:

In [None]:
df = pf.add_parameters(
    k_files[:2],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.1,
    upper_bound=10.0,
    geostruct=pp_geostruct_k,
    par_name_base="hk-pp-wt",
    pargp="hk-pp-wt",
)

In [None]:
df.head()

Since the pilot points are designed to accomodate spatial heterogeneity, let's also include a layer-constant parameter to help sample a wider range of HK values:

In [None]:
df = pf.add_parameters(
    k_files[:2],
    par_type="constant",
    lower_bound=0.1,
    upper_bound=10.0,
    par_name_base="hk-cn-wt",
    pargp="hk-cn-wt",
)
df

Do the same for HK in layer 3 and HK in layers 4 and 5 together

In [None]:
# df = pf.add_parameters(
#     k_files[2],
#     par_type="pilotpoints",
#     pp_options={"pp_space": 3},
#     lower_bound=0.1,
#     upper_bound=10.0,
#     geostruct=pp_geostruct_k,
#     par_name_base="hk-pp-conf",
#     pargp="hk-pp-conf",
# )
# df = pf.add_parameters(
#     k_files[2],
#     par_type="constant",
#     lower_bound=0.10,
#     upper_bound=1.0,
#     par_name_base="hk-cn-conf",
#     pargp="hk-cn-conf",
# )

In [None]:
df = pf.add_parameters(
    k_files[3:],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.1,
    upper_bound=10.0,
    geostruct=pp_geostruct_k,
    par_name_base="hk-pp-aq",
    pargp="hk-pp-aq",
)
df = pf.add_parameters(
    k_files[3:],
    par_type="constant",
    lower_bound=0.10,
    upper_bound=1.0,
    par_name_base="hk-cn-aq",
    pargp="hk-cn-aq",
)

To let us see what the actual HK array that mf6 sees, let's add that array as a set of observatitons also:

In [None]:
for k_file in [k_files[0], k_files[-1]]:
    print(k_file)
    pf.add_observations(
        k_file,
        obsgp=k_file.split(".")[1].replace("_", "-"),
        prefix=k_file.split(".")[1].replace("_", "-"),
    )

Setup a similar scheme of parameters to VK:

In [None]:
k33_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.npf_k33_layer")]
assert len(k_files) == gwf.dis.nlay.data
k33_files.sort()

# df = pf.add_parameters(
#     k33_files[:2],
#     par_type="pilotpoints",
#     pp_options={"pp_space": 3},
#     lower_bound=0.1,
#     upper_bound=10.0,
#     geostruct=pp_geostruct_k33,
#     par_name_base="k33-pp-wt",
#     pargp="k33-pp-wt",
# )
# df = pf.add_parameters(
#     k33_files[:2],
#     par_type="constant",
#     lower_bound=0.1,
#     upper_bound=10.0,
#     par_name_base="k33-cn-wt",
#     pargp="k33-cn-wt",
# )

# pf.add_observations(
#     k33_files[0],
#     obsgp=k33_files[0].split(".")[1].replace("_", "-"),
#     prefix=k33_files[0].split(".")[1].replace("_", "-"),
# )

df = pf.add_parameters(
    k33_files[2],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.01,
    upper_bound=100.0,
    geostruct=pp_geostruct_k33,
    par_name_base="k33-pp-conf",
    pargp="k33-pp-conf",
)
df = pf.add_parameters(
    k33_files[2],
    par_type="constant",
    lower_bound=0.1,
    upper_bound=10.0,
    par_name_base="k33-cn-conf",
    pargp="k33-cn-conf",
)

pf.add_observations(
    k33_files[2],
    obsgp=k33_files[2].split(".")[1].replace("_", "-"),
    prefix=k33_files[2].split(".")[1].replace("_", "-"),
)


# df = pf.add_parameters(
#     k33_files[3:],
#     par_type="pilotpoints",
#     pp_options={"pp_space": 3},
#     lower_bound=0.1,
#     upper_bound=10.0,
#     geostruct=pp_geostruct_k33,
#     par_name_base="k33-pp-aq",
#     pargp="k33-pp-aq",
# )
# df = pf.add_parameters(
#     k33_files[3:],
#     par_type="constant",
#     lower_bound=0.1,
#     upper_bound=10.0,
#     par_name_base="k33-cn-aq",
#     pargp="k33-cn-aq",
# )

# pf.add_observations(
#     k33_files[-1],
#     obsgp=k33_files[-1].split(".")[1].replace("_", "-"),
#     prefix=k33_files[-1].split(".")[1].replace("_", "-"),
# )

And SS and sy (in layer 1 only):

In [None]:
ss_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.sto_ss")]
assert len(ss_files) == gwf.dis.nlay.data

In [None]:
df = pf.add_parameters(
    ss_files[:2],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.1,
    upper_bound=10.0,
    geostruct=pp_geostruct_ss,
    par_name_base="ss-pp-wt",
    pargp="ss-pp-wt",
)
df = pf.add_parameters(
    ss_files[:2],
    par_type="constant",
    lower_bound=0.1,
    upper_bound=10.0,
    par_name_base="ss-cn-wt",
    pargp="ss-cn-wt",
)
df = pf.add_parameters(
    ss_files[2],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.1,
    upper_bound=10.0,
    geostruct=pp_geostruct_ss,
    par_name_base="ss-pp-conf",
    pargp="ss-pp-conf",
)
df = pf.add_parameters(
    ss_files[2],
    par_type="constant",
    lower_bound=0.1,
    upper_bound=10.0,
    par_name_base="ss-cn-conf",
    pargp="ss-cn-conf",
)
df = pf.add_parameters(
    ss_files[3:],
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.1,
    upper_bound=10.0,
    geostruct=pp_geostruct_ss,
    par_name_base="ss-pp-aq",
    pargp="ss-pp-aq",
)
df = pf.add_parameters(
    ss_files[3:],
    par_type="constant",
    lower_bound=0.1,
    upper_bound=10.0,
    par_name_base="ss-cn-aq",
    pargp="ss-cn-aq",
)

pf.add_observations(
    ss_files[0],
    obsgp=ss_files[0].split(".")[1].replace("_", "-"),
    prefix=ss_files[0].split(".")[1].replace("_", "-"),
)

pf.add_observations(
    ss_files[2],
    obsgp=ss_files[2].split(".")[1].replace("_", "-"),
    prefix=ss_files[2].split(".")[1].replace("_", "-"),
)

pf.add_observations(
    ss_files[-1],
    obsgp=ss_files[-1].split(".")[1].replace("_", "-"),
    prefix=ss_files[-1].split(".")[1].replace("_", "-"),
)

In [None]:
sy_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.sto_sy")]
assert len(sy_files) == gwf.dis.nlay.data
sy_files.sort()
sy_file = sy_files[0]
assert "layer1" in sy_file

In [None]:
df = pf.add_parameters(
    sy_file,
    par_type="pilotpoints",
    pp_options={"pp_space": 3},
    lower_bound=0.6,
    upper_bound=1.4,
    geostruct=pp_geostruct_sy,
    par_name_base="sy-pp-wt",
    pargp="sy-pp-wt",
    ult_ubound=1.0,
    transform="none",
)
df = pf.add_parameters(
    sy_file,
    par_type="constant",
    lower_bound=0.9,
    upper_bound=1.1,
    par_name_base="sy-cn-wt",
    pargp="sy-cn-wt",
    transform="none",
)
pf.add_observations(
    sy_file,
    obsgp=sy_file.split(".")[1].replace("_", "-"),
    prefix=sy_file.split(".")[1].replace("_", "-"),
)

Set up some parameters for the pumping wells - we arent going to adjust these (dont we all have perfect historic water use data?!), but we will use them as decision variables later on...

In [None]:
predwel_files = [
    f
    for f in os.listdir(pf.new_d)
    if f.startswith("sv.prediction.well_stress_period_data_")
]
assert len(predwel_files) > 0

Since the prediction well only has information for the predictive period, we can use its stress period/kper information to for other packages:

In [None]:
kper_start_pred = 1000000
for pwel_file in predwel_files:
    kper = kper = int(pwel_file.split(".")[2].split("_")[-1]) - 1
    kper_min = min(kper_start_pred, kper)
    pf.add_parameters(
        pwel_file,
        par_type="grid",
        index_cols=[0, 1, 2],
        use_cols=[3],
        upper_bound=3.0,
        lower_bound=0.0,
        pargp="predwel_kper:{0}".format(kper),
        par_name_base="predwel_kper:{0}".format(kper),
        transform="none",
        initial_value=1.0,
        par_style="m",
    )

Now find any remaining wel/maw files:

In [None]:
wel_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.maw_perioddata")]
ismaw = True
if len(wel_files) == 0:
    wel_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.pwell.wel_")]
    ismaw = False
assert len(wel_files) > 0

And add parameters for them:

In [None]:
if ismaw:
    for wel_file in wel_files:
        kper = int(wel_file.split(".")[1].split("_")[-1]) - 1
        if kper < kper_start_pred:
            continue
        if kper == 0:
            continue
        print(wel_file)
        pf.add_parameters(
            wel_file,
            par_type="grid",
            par_style="m",
            par_name_base="welrate_kper:{0}".format(kper),
            pargp="welrate_kper:{0}".format(kper),
            mfile_skip=2,
            index_cols=[0],
            use_cols=[2],
            mfile_fmt="free",
            upper_bound=3.0,
            lower_bound=0.0,
            transform="none",
        )
else:
    for wel_file in wel_files:
        kper = int(wel_file.split(".")[2].split("_")[-1]) - 1
        if kper < kper_start_pred:
            continue
        pf.add_parameters(
            wel_file,
            par_type="grid",
            par_style="m",
            par_name_base="welrate_kper:{0}".format(kper),
            pargp="welrate_kper:{0}".format(kper),
            mfile_skip=2,
            index_cols=[0, 1, 2],
            use_cols=[3],
            mfile_fmt="free",
            upper_bound=3.0,
            lower_bound=0.0,
            transform="none",
        )

Add some recharge parameters for the base model - this is to try to account for the uncertainty that has been introduced through simplification...if we are using uzf, then add some small uncertainties for precip/infilt:

In [None]:
rech_files = [
    f for f in os.listdir(pf.new_d) if f.startswith("sv.rch_stress_period_data_")
]
if len(rech_files) == 0:
    uzf_pak_file = "sv.uzf_packagedata.txt"
    if os.path.exists(os.path.join(pf.new_d, uzf_pak_file)):
        uzf_files = [f for f in os.listdir(pf.new_d) if f.startswith("sv.uzf_period")]
        uzf_files.sort()
        print(uzf_files)
        assert len(uzf_files) == sim.tdis.nper.data
        for uzf_file in uzf_files:
            kper = int(uzf_file.split(".")[1].split("_")[-1]) - 1
            if kper >= kper_start_pred:  # dont add forecast parameters
                continue
            df = pf.add_parameters(
                uzf_file,
                par_type="constant",
                lower_bound=0.8,
                upper_bound=1.2,
                par_name_base="uzf-tcn_kper:{0}".format(kper),
                pargp="uzf-tcn_kper:{0}".format(kper),
                transform=None,
                index_cols=[0],
                use_cols=[1],
                mfile_skip=0,
            )
    else:
        raise Exception("didnt find any rech files or uzg package data file")

else:
    assert len(rech_files) == sim.tdis.nper.data
    rech_files.sort()
    for rech_file in rech_files:
        kper = int(rech_file.split(".")[1].split("_")[-1]) - 1
        if kper >= kper_start_pred:  # dont add forecast parameters
            continue
        df = pf.add_parameters(
            rech_file,
            par_type="constant",
            lower_bound=0.6,
            upper_bound=1.4,
            par_name_base="rech-tcn_kper:{0}".format(kper),
            pargp="rech-tcn_kper:{0}".format(kper),
            transform=None,
            index_cols=[0, 1, 2],
            use_cols=[3],
            mfile_skip=0,
        )

Now build the interface and the control file:

In [None]:
pf.build_pst(filename="pest.pst")

Check the `obsval` quantities in the "* observation data" section - what are those numbers:

In [None]:
obs = pf.pst.observation_data
obs.obsval

So if the `obsval` values are all the existing model output values, then if we run the model again just the same way, we should have a phi of zero - a great check!. Let's do that:

In [None]:
pst = pf.pst
pst.control_data.noptmax = 0
pst.write(os.path.join(pf.new_d, "pest.pst"), version=2)

In [None]:
pyemu.os_utils.run("pestpp-ies pest.pst", cwd=pf.new_d)

In [None]:
pst.set_res(os.path.join(pf.new_d, "pest.base.rei"))
pst.phi

assert pst.phi < 1.e7

now we need to "fix" the pumping well pars - those are for later when we do optimization

In [None]:
par = pst.parameter_data

wellpars = par.loc[par.parnme.str.contains("wel"), :]
assert len(wellpars) > 0
par.loc[wellpars.parnme, "partrans"] = "fixed"

Now generate a Prior parameter ensemble (which the parameter bound and geostat info we passed to `PstFrom` above)

In [None]:
pe = pf.draw(num_reals=1000)

We need to enforce parameter bounds on those realizations, save it and add an arg to the control file to tell ies to use it:

In [None]:
pe.enforce()
pe.to_csv(os.path.join(pf.new_d, "prior.csv"))
pst.pestpp_options["ies_par_en"] = "prior.csv"
pst.pestpp_options["ies_num_reals"] = 30
pst.control_data.noptmax = -1
pst.write(os.path.join(pf.new_d, "pest.pst"), version=2)

Run the first realization in the ensemble:

In [None]:
pst.parameter_data.loc[pe.columns, "parval1"] = pe.iloc[0, :].values
pst.control_data.noptmax = 0
pst.write(os.path.join(pf.new_d, "test.pst"), version=2)
pyemu.os_utils.run("pestpp-ies test.pst", cwd=pf.new_d)

Extra:  run a small prior monte carlo:

In [None]:
# pyemu.os_utils.start_workers(
#     pf.new_d,
#     "pestpp-ies",
#     "pest.pst",
#     worker_root=".",
#     num_workers=10,
#     master_dir="master_prior_mc",
# )