# Prepare for sequential data assimilation

### 1. The modified Freyberg PEST dataset

The modified Freyberg model is introduced in another tutorial notebook (see "freyberg intro to model"). The current notebook picks up following the "freyberg psfrom pest setup" notebook, in which a high-dimensional PEST dataset was constructed using `pyemu.PstFrom`. You may also wish to go through the "intro to pyemu" notebook beforehand.

The next couple of cells load necessary dependencies and call a convenience function to prepare the PEST dataset folder for you. This is the same dataset that was constructed during the "freyberg pstfrom pest setup" tutorial. Simply press `shift+enter` to run the cells.

In [None]:
import os
import shutil
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 
import pyemu
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt;
import flopy

import sys
sys.path.append("..")
# import pre-prepared convenience functions
import herebedragons as hbd

In [None]:
#TODO: if we move the obsvals and weights process to a sep notebook, 
# we need to also check that it has been executed. We could set an 
# "observed" column in the obs data to indicate that we have set obs vals and weights?

# specify the temporary working folder
t_d = os.path.join('freyberg6_da_template')

org_t_d = os.path.join("..","part2_pstfrom_pest_setup","freyberg6_template")
if not os.path.exists(org_t_d):
    raise Exception("you need to run the '/part2_pstfrom_pest_setup/freyberg_pstfrom_pest_setup.ipynb' notebook")

if os.path.exists(t_d):
    shutil.rmtree(t_d)
shutil.copytree(org_t_d,t_d)
                       


There are several modifications we need to make to both the model and pest interface in order to go from batch estimation to sequential estimation.  First, we need to make the model a single stress period model - PESTPP-DA will take control of the advancement of simulation time...

In [None]:
with open(os.path.join(t_d,"freyberg6.tdis"),'w') as f:
    f.write("# new tdis written hastily at {0}\n]\n".format(datetime.now()))
    f.write("BEGIN options\n  TIME_UNITS days\nEND options\n\n")
    f.write("BEGIN dimensions\n  NPER 1\nEND dimensions\n\n")
    f.write("BEGIN perioddata\n  1.0  1 1.0\nEND perioddata\n\n")

          

Now, just make sure we havent done something dumb (er than usual):

In [None]:
pyemu.os_utils.run("mf6",cwd=t_d)

# Now for the hard part

First, let's assign cycle numbers to the time-varying parameters and their template files

In [None]:
pst = pyemu.Pst(os.path.join(t_d,"freyberg_mf6.pst"))


In [None]:
df = pst.model_input_data
df

In [None]:
df.loc[:,"cycle"] = -1

In [None]:
sfrdf = df.loc[df.pest_file.apply(lambda x: "sfr" in x and "cond" not in x),:]
sfrdf.loc[:,"inst"] = sfrdf.pest_file.apply(lambda x: int(x.split("inst")[1].split("_")[0]))
sfrdf.head()

In [None]:
df.loc[sfrdf.index,"cycle"] = sfrdf.inst.values
df.loc[sfrdf.index,:]

In [None]:
weldf = df.loc[df.pest_file.str.contains('wel'),:]
weldf.loc[:,"cycle"] = weldf.pest_file.apply(lambda x: int(x.split("inst")[1].split("_")[0]))
weldf.head()

In [None]:
rchdf = df.loc[df.pest_file.apply(lambda x: "rch" in x and "tcn" in x),:]
rchdf.loc[:,"cycle"] = rchdf.pest_file.apply(lambda x: int(x.split("tcn")[0].split("_")[-1])-1)
rchdf.head()

In [None]:
par = pst.parameter_data
par.loc[:,"cycle"] = -1

In [None]:
wpar = par.loc[par.parnme.str.contains("wel"),:]
wpar.loc[:,"cycle"] = wpar.inst.astype(int)

In [None]:
spar = par.loc[par.parnme.apply(lambda x: "sfr" in x and "cond" not in x),:]
spar.loc[:,"cycle"] = spar.inst.astype(int)
spar.head()

In [None]:
rpar = par.loc[par.parnme.apply(lambda x: "rch" in x and "tcn" in x),:]
rpar.loc[:,"cycle"] = rpar.parnme.apply(lambda x: int(x.split("tcn")[0].split("_")[-1])-1)
rpar

Now we need to add a special parameter that will be used to control the length of the stress period that the single-stress-period model will simulate.  As usual, we do this with a template file:

In [None]:
with open(os.path.join(t_d,"freyberg6.tdis.tpl"),'w') as f:
    f.write("ptf ~\n")
    f.write("# new tdis written hastily at {0}\n]\n".format(datetime.now()))
    f.write("BEGIN options\n  TIME_UNITS days\nEND options\n\n")
    f.write("BEGIN dimensions\n  NPER 1\nEND dimensions\n\n")
    f.write("BEGIN perioddata\n  ~  perlen  ~  1 1.0\nEND perioddata\n\n")

In [None]:
pst.add_parameters(os.path.join(t_d,"freyberg6.tdis.tpl"),pst_path=".")

In [None]:
pst.parameter_data.loc["perlen","partrans"] = "fixed"

Since `perlen` needs to change over cycles, we can use a parameter cycle table rather than making a duplicate template file and `perlen` parameter for each cycle:

In [None]:
sim = flopy.mf6.MFSimulation.load(sim_ws=org_t_d,load_only=["dis"])
org_perlen = sim.tdis.perioddata.array["perlen"]
org_perlen

In [None]:
df = pd.DataFrame({"perlen":org_perlen},index=np.arange(org_perlen.shape[0]))
df

In [None]:
df.to_csv(os.path.join(t_d,"par_cycle_table.csv"))
pst.pestpp_options["da_parameter_cycle_table"] = "par_cycle_table.csv"

Now for the observation data - yuck!

In [None]:
obs = pst.observation_data
obs

In [None]:
pst.model_output_data

In [None]:
pst.drop_observations(os.path.join(t_d,"freyberg_mp.mpend.ins"),pst_path=".")

In [None]:
pst.drop_observations(os.path.join(t_d,"sfr.tdiff.csv.ins"),pst_path=".")

In [None]:
pst.drop_observations(os.path.join(t_d,"heads.tdiff.csv.ins"),pst_path=".")

In [None]:
hdf = pst.drop_observations(os.path.join(t_d,"heads.csv.ins"),pst_path=".")
#sdf = pst.drop_observations(os.path.join(t_d,"sfr.csv.ins"),pst_path=".")

In [None]:
pst.model_output_data

In [None]:
sdf = None
for ins_file in pst.model_output_data.pest_file:
    if ins_file.startswith("hdslay"):
        continue
    lines = open(os.path.join(t_d,ins_file),'r').readlines()
    df = pst.drop_observations(os.path.join(t_d,ins_file),pst_path=".")
    if ins_file == "sfr.csv.ins":
        sdf = df
    with open(os.path.join(t_d,ins_file),'w') as f:
        for line in lines[:3]:
            f.write(line.replace("_totim:3652.5","").replace("_time:3652.5",""))
    pst.add_observations(os.path.join(t_d,ins_file),pst_path=".")
assert sdf is not None

In [None]:
hdf.loc[:,"k"] = hdf.usecol.apply(lambda x: int(x.split("-")[1]))
hdf.loc[:,"i"] = hdf.usecol.apply(lambda x: int(x.split("-")[2]))
hdf.loc[:,"j"] = hdf.usecol.apply(lambda x: int(x.split("-")[3]))
hdf.loc[:,"time"] = hdf.time.astype(float)