In [None]:
import os
import shutil
import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt;
import psutil

import sys
import pyemu
import flopy
assert "dependencies" in flopy.__file__
assert "dependencies" in pyemu.__file__
sys.path.insert(0,"..")
import herebedragons as hbd

# Ensemble Variance Analysis (EVA)

EVA is a method that allows assessment of the worth of data in reducing forecast uncertainty [He et al (2018)](https://doi.org/10.2118/182609-PA), much like FOSM-based methods of data worth analysis discussed in other tutorial notebooks.  However, EVA has a major advantage: it relies on ensembles instead of finite difference derivatives and is independent of the number of model parameters.  In this way, EVA has a lot in common with DSI...

Based on a multivariate Gaussian assumption between the observation data and the objective function, EVA measures the expected reduction in uncertainty using covariance information estimated from a group of simulations. The method has several innovative features compared to other approaches. 

 - Firstly, the EVA method copes well with high parameter dimensionality and nonlinearity between model inputs/parameters and model outputs, allowing it to handle nonlinear forward models and any number of parameters. Much as we discussed for model-emulator history matching with DSI, EVA can be undertaken on models of arbitrary complexity and parameterization...all you need is an ensemble of model outputs! 

 - Secondly, even if the multivariate Gaussian assumption between the historic observations and the forecasts quantities of interest is violated, the EVA method still provides a lower bound on the expected uncertainty reduction, which can be useful for giving a conservative estimate of the performance of the data collection programs. 

 - Finally, EVA also gives an estimate of the shift in the mean of the foreacast posterior distribution, which is crucial for calculating value-of-information (VOI). This enables exploration of what the forecast mean is expected to be, given a set of assumed observed values. Powerful stuff!


# Admin
As usual, let's get some files from previous tutorials...

In [None]:
# specify the temporary working folder
t_d = os.path.join('master_ies_1a')
if os.path.exists(t_d):
    shutil.rmtree(t_d)

org_t_d = os.path.join("..","part2_06_ies","master_ies_1a")
if not os.path.exists(org_t_d):
    raise Exception(f"you need to run the {org_t_d} notebook")
shutil.copytree(org_t_d,t_d)

# Getting ready

As for FOSM, we need a few ingredients for EVA:
 
- we need a pst control file with observation data
- we need a list of `predictions` obsnmes
- we need an ensemble of observation values (this can be prior or posterior...which one to use depends on what one is trying to accomplish...)
- we need an ensemble of noise (pre-prepared or obtained from the `weight` or `standard_deviation` columns in the `pst.observation_data`)

In [None]:
# load the Pst
pst_name = os.path.join(t_d, "freyberg_mf6.pst")
pst = pyemu.Pst(pst_name)

In [None]:
# get predictions
predictions = pst.pestpp_options['forecasts'].split(',')
predictions

In [None]:
# the prior obs ensemble
oe_pr = pyemu.ObservationEnsemble.from_csv(pst=pst, filename=pst_name.replace(".pst", ".0.obs.csv"))
# the posterior obs ensemble
oe_pt = pyemu.ObservationEnsemble.from_csv(pst=pst, filename=pst_name.replace(".pst", ".3.obs.csv"))

# note how oe_pt may have lost a few realizations along the way
oe_pr.shape, oe_pt.shape

Now for the noise ensemble. 

In [None]:
# load in an obs+noise ensemble
noise = pyemu.ObservationEnsemble.from_csv(pst,pst_name.replace(".pst", ".obs+noise.csv"))
noise.shape

# Evaluate value of existing obs

For starters, let's look at the data worth of observations for which we already have data. (i.e. the observations we previously used for history matching with pestpp-ies). 

In the `pyemu` world, all things EVA/DSI start with the `EnDS` Class. Spin one up now. Note that, as we want to evaluate the value of "existing" observations, with which we might be going to use to history match, we will use the prior ensemble:


In [None]:
ends = pyemu.EnDS(pst=pst,
                  sim_ensemble=oe_pr, #prior
                  noise_ensemble=noise,
                  predictions=predictions,
                  verbose=False)

Recall that, for the Freyberg model, our observations are time-series of values recorded at monitored sites. Let's evaluate the worth of each time-series, rather than each observation independently. Create a dictionary with obs group name (i.e., time-series) as the keys, and the respective observation names.

In [None]:
obs = pst.observation_data
nzobs= obs.loc[pst.nnz_obs_names].copy()
obslist_dict={}
for o in nzobs.obgnme.unique():
    obslist_dict[o] = nzobs.loc[nzobs.obgnme==o].obsnme.tolist()
obslist_dict.items()

right on! we are good to go...let's kick this off. 

First, let's assess the worth of these observations in conditioning the prior (`oe_pr`):

In [None]:
mean_dfs,dfstd,dfpercen = ends.get_posterior_prediction_moments(obslist_dict=obslist_dict.copy(),
                                                                sim_ensemble=oe_pr, #if None, uses the sim_ensemble originally passed to ends
                                                                include_first_moment=False)

`dfstd` is a dataframe of the expected standard deviation after conditioning with the respective observations.

`dfpercen` is the "percetage reduction" in uncertainty. This allows for easier comparison between predictions of different magnitudes.

Here goes a quick and dirty plot:

In [None]:
_ = dfpercen.plot(kind='bar')

Each entry on the x-axis is one of the observation sites with time-series of measured data...+ the `posterior`, which shows the data worth of all observations concurrently. The y-axis shows percentage uncertainty reduction. Each bar is for one of the four predictions.  

The larger the bar, the more worth that observation group has when reducing uncertainty for the respective prediction. 

Now the same, but using the posterior observation ensemble:

In [None]:
mean_dfs,dfstd,dfpercen = ends.get_posterior_prediction_moments(obslist_dict=obslist_dict.copy(),
                                                                sim_ensemble=oe_pt, #if None, uses the sim_ensemble originally passed to ends
                                                                include_first_moment=False)
dfpercen.plot(kind='bar')

Now you may be asking: "Hold up! How come we are expecting to have more uncertainty reduction??? Didn't we already history match against these obs??"

Totally right. However, here we are evaluating the worth assuming we get a fit that is *commensurate with measurement noise*! (Which pestpp-ies didn't). What this is telling us is that, if we were able to get a better fit, we could reduce uncertainty of these predictions by this much further. (Assuming we avoid inducing bias, etc etc). Based on these outcomes one might determine whether or not it is worth revisiting model construction/parameterization/history matching...

# Evaluate the worth of new data

We can do the same thing for "as of yet uncollected data". The procedure is the same.

First, a dictionary of obsname groups for time-series of data collected from potential new sites:

In [None]:
obs = pst.observation_data

# get obs from the historic period
historytimes = obs.loc[pst.nnz_obs_names].time.tolist()
nnzobgnmes = obs.loc[(obs.oname=='hds') & (obs.weight>0)].obgnme.unique()
obgnmes = obs.loc[(~obs.obgnme.isin(nnzobgnmes)) &
                (obs.oname=='hds')
                ].obgnme.unique()
# group by time series
obslist_dict = {o:obs.loc[(obs.obgnme==o) &
                          (obs.time.isin(historytimes))
                          ].obsnme.tolist() for o in obgnmes}



NOTE: because these observations were not previously assigned a weight when generating the obs+noise ensemble, we need to make sure that we assign appropriate weight and/or standard_deviation values in the Pst control file:

In [None]:
for o in list(obslist_dict.keys()):
    obs.loc[obslist_dict[o], "weight"] = 1/0.1
    obs.loc[obslist_dict[o], "standard_deviation"] = 0.1

here we go...

In [None]:
ends = pyemu.EnDS(pst=pst,
                  sim_ensemble=oe_pt, #for the posterior
                  #noise_ensemble=noise, #NOTE: this will now be generated from the observation_data
                  predictions=predictions,
                  verbose=False)

mean_dfs,dfstd,dfpercen = ends.get_posterior_prediction_moments(obslist_dict=obslist_dict.copy(),
                                                                sim_ensemble=oe_pt,
                                                                include_first_moment=False)
dfpercen.plot(kind='bar')                                                                

...and there we have it: the % uncertainty reduction for each forecast for each of the potential new observation points...as well as the total expected uncertainty reduction if all observations are included as non-zero weight observations. (...assuming a fit commensurate to noise yadda yadda...)

# Evaluate whether uncertainty has converged

Another useful application is to test whether uncertainty has converged...i.e. do we have enough realizations?

We can accomplish this by repeatedly running `ends.get_predictive_posterior_moments()`, with less than all the possible realizations, and checking if the uncertainty estimates change.

`ends.get_posterior_prediction_convergence_summary()` automates this process. All we need to do is pass a sequence of "number of realizations" to test (`num_realization_sequence`), as well as a list of how many randomly selected realizations (`num_replicate_sequence`) to replicate each of the sequences.

In [None]:
# the sequence of number of realizations to test
num_reals_seq = [int(i) for i in np.linspace(10,oe_pt.shape[0],20)]
num_reals_seq

first we will test with 10 reals, then 54, then 98 and so on....

In [None]:
#how many randomly slected reals to sample, for each of the elements in the `num_real_seq` list
num_reps_seq = len(num_reals_seq)*[20]
num_reps_seq

first, we will randomly select 10 reals and test them, then repeat 5 times.

then, we will randomly select 54 reals and test them, then repeat 5 times.

and so on...


...right, here we go. This may take a few minutes...

In [None]:
obs = pst.observation_data
nzobs= obs.loc[pst.nnz_obs_names].copy()
obslist_dict={}
for o in nzobs.obgnme.unique():
    obslist_dict[o] = nzobs.loc[nzobs.obgnme==o].obsnme.tolist()
obslist_dict.items()
ends = pyemu.EnDS(pst=pst,
                  sim_ensemble=oe_pt,
                  noise_ensemble=noise,
                  predictions=predictions,
                  verbose=False)

means = ends.get_posterior_prediction_convergence_summary(
                                                        num_realization_sequence=num_reals_seq,
                                                        num_replicate_sequence=num_reps_seq,
                                                        obslist_dict=[])#obslist_dict.copy() #important to pass a copy, as it gets changed in the background
                                                        #)

OK, now to plot those results up...

In [None]:
fig,axs = plt.subplots(2,2,figsize=(8,8),sharex=True)
axs = axs.flatten()
for e,p in enumerate(predictions):
    ax = axs[e]
    ax.set_title(p.split("usecol:")[-1])
    ax.set_ylabel("mean predictive standard deviation")
    ax.set_xlabel("num reals")

    for k in means.keys():
        df = means[k]
        value = df.loc["posterior",p]
        ax.scatter(k,value,color='b',s=19)
    ax.grid(lw=1)
fig.tight_layout()

Sweet, there we have it. The uncertainty convergence for each prediction versus number of reals. Where the plots plateau indicates uncertainty has converged. For most of the predictions, approximately 200 reals should be sufficient. As usual, `part_time` is a bit more challenging. 