# A very simple demo to explain how the variogram effects stochastic realizations

In this simple notebook, we exploit the speed of `PyPestUtils` to help build understanding of how the variogram parameters effect the resulting stochastic fields using interactive plotting #winning

In [None]:
import sys

import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interact

sys.path.append("..")
from pypestutils.pestutilslib import PestUtilsLib

lib = PestUtilsLib(logger_level=0)

Just some definitions.  We will use a 100 X 100 grid of nodes with  delx/dely (and an area) of 1.

In [None]:
nrow = ncol = 100
x = np.cumsum(np.ones((nrow, ncol)), axis=1)
y = np.cumsum(np.ones((nrow, ncol)), axis=0)
area = 1
active = 1
mean = 1  # log10 HK mean
var = 0.1  # log10 HK variance
ec = x.flatten()
nc = y.flatten()
transtype = 0  # not log transformed
vtype = 1  # exponential variogram
power = 0
nreal = 1

This is the function that will generate one realization and plot it for us, given a correlation length (`aa`), an anisotropy ratio (`anis`), and a `bearing`). A lot of gross plotting code here - feel free to ignore it...

In [None]:
def plot_real(corrlen, anisotropy, bearing):
    fig, axes = plt.subplots(1, 1, figsize=(7, 6))
    # use the same random seed each call so that we get "the same" realization
    lib.initialize_randgen(1123455)
    # generate one realization
    arrs = lib.fieldgen2d_sva(
        ec,
        nc,
        area,
        int(active),
        mean,
        var,
        corrlen,
        anisotropy,
        bearing,
        transtype,
        vtype,
        power,
        nreal,
    )
    # plot
    cb = axes.imshow(arrs[:, 0].reshape((nrow, ncol)), vmin=0.0, vmax=2, cmap="magma")
    axes.set_xlabel("column")
    axes.set_ylabel("row")
    axes.set_title("a random realization of HK", loc="left")
    plt.show()
    del cb

Move these slowly so it doesn't flicker...

 - corrlen = the correlation length of the variogram
 - anisotropy = the anisotropy ratio of the primary to second axes of the anisotropy ellipse
 - bearing = the angle from north of the primary axis of the anisotropy ellipse

In [None]:
_ = interact(plot_real, corrlen=(1, 50, 1), anisotropy=(1, 10, 0.5), bearing=(0, 90, 1))

Test your understand:
- what happens when the correlation length goes to 1.0 (the grid spacing distance)?  This is why we need to use geostatistics when generating realizations of spatially parameterizations (like grid-scale and pilot point parameters)
- What do you think will happen if the correlation length goes to infinity?

# Uncertainty in the uncertainty

In practice, how often do we "know" the variogram for each property we want to estimate?  Or is a variogram even appropriate to describe the spatial patterns and characteristics of the property?!  These are difficult questions to grapple with in applied groundwater modeling.  However, if we choose to stay with the variogram concept, but acknowledge that concept may itself be subject to some assumptions and, therefore, uncertainty, this leads to the idea that the variogram quantites (i.e. correlation length, anisotropy, and bearing) themselves are also "uncertain" and might should be accounted for the inverse/inference problem (by parameterizing them)...and that the uncertainty in those variogram quantities might also be spatially varying.  WAT?!  This is getting deep...these types of situations lead to a hierarchical structure in the inverse/inference problem and these parameters that inform the variogram quantities are typically called "hyperparameters".  Whew!

`PyPestUtils` implements a form of hierarchical geostatistical simulation (also referred to as nonstationary geostationary geostatistics). To help build some understanding, let's explore what happens when we introduce hyper parameters on the property variogram.  In essence, we are now going to treat the bearing, anisotropy, and correlation length of the above variogram as themselves being described by variograms...Its like geostatistical inception...

In [None]:
nrow = 80
ncol = 50
x = np.cumsum(np.ones((nrow, ncol)), axis=1)
y = np.cumsum(np.ones((nrow, ncol)), axis=0)
area = 1
active = 1
mean = 1  # log10 HK mean
var = 0.1  # log10 HK variance
ec = x.flatten()
nc = y.flatten()
transtype = 0  # not log transformed
vtype = 1  # exponential variogram
power = 0
nreal = 1

A more complex plotting function that treats the property variogram input quantities as geostatistical quanities, each with their own variogram

In [None]:
def plot_real(
    b_mean,
    b_var,
    b_corrlen,
    b_anisotropy,
    b_bearing,
    a_mean,
    a_var,
    a_corrlen,
    a_anisotropy,
    a_bearing,
    c_mean,
    c_var,
    c_corrlen,
    c_anisotropy,
    c_bearing,
):
    fig, axes = plt.subplots(1, 4, figsize=(10, 3))
    # axes.clear()
    # reset the random seed so that the underlying pseudo random numbers dont change
    lib.initialize_randgen(1123455)
    # generate one realization
    barr = lib.fieldgen2d_sva(
        ec,
        nc,
        area,
        int(active),
        b_mean,
        b_var,
        b_corrlen,
        b_anisotropy,
        b_bearing,
        transtype,
        vtype,
        power,
        nreal,
    )

    cb = axes[0].imshow(barr[:, 0].reshape((nrow, ncol)), cmap="magma")
    plt.colorbar(cb, ax=axes[0])
    axes[0].set_title("bearing")

    aarr = lib.fieldgen2d_sva(
        ec,
        nc,
        area,
        int(active),
        a_mean,
        a_var,
        a_corrlen,
        a_anisotropy,
        a_bearing,
        transtype,
        vtype,
        power,
        nreal,
    )

    cb = axes[1].imshow(aarr[:, 0].reshape((nrow, ncol)), cmap="magma")
    axes[1].set_title("aniso")
    plt.colorbar(cb, ax=axes[1])

    carr = lib.fieldgen2d_sva(
        ec,
        nc,
        area,
        int(active),
        c_mean,
        c_var,
        c_corrlen,
        c_anisotropy,
        c_bearing,
        transtype,
        vtype,
        power,
        nreal,
    )

    cb = axes[2].imshow(carr[:, 0].reshape((nrow, ncol)), cmap="magma")
    axes[2].set_title("corrlen")
    plt.colorbar(cb, ax=axes[2])

    arrs = lib.fieldgen2d_sva(
        ec,
        nc,
        area,
        int(active),
        mean,
        var,
        carr[:, 0],
        aarr[:, 0],
        barr[:, 0],
        transtype,
        vtype,
        power,
        nreal,
    )
    # plot

    cb = axes[3].imshow(
        arrs[:, 0].reshape((nrow, ncol)), vmin=0.0, vmax=2, cmap="magma"
    )
    axes[3].set_title("HK")
    plt.colorbar(cb, ax=axes[3])
    # $plt.colorbar(cb,ax=axes,label="$log_{10}$ HK")
    for ax in axes:
        # ax.set_xlabel("column")
        # ax.set_ylabel("row")
        # ax.set_title("a random realization of HK",loc="left")
        ax.set_xticks([])
        ax.set_yticks([])

    plt.show()

The 'b_' prefix is the for bearing variogram, 'a_' is the anisotropy variogram, and, you guessed it, 'c_' is for the correlation length variogram...This is complex stuff, but we found that these sliders really helped us start to build some intution regarding how these hyper parameters interact....

In [None]:
_ = interact(
    plot_real,
    b_mean=(0, 90, 1),
    b_var=(0.1, 50, 1),
    b_corrlen=(1, 100, 10),
    b_anisotropy=(1, 10, 0.5),
    b_bearing=(0, 90, 1),
    a_mean=(1, 10, 1),
    a_var=(0.1, 10.0, 0.1),
    a_corrlen=(10, 500, 10),
    a_anisotropy=(1, 10, 0.5),
    a_bearing=(0, 90, 1),
    c_mean=(1, 50, 1),
    c_var=(0.1, 50.0, 0.1),
    c_corrlen=(1, 1000, 10),
    c_anisotropy=(1, 50, 1),
    c_bearing=(0, 90, 1),
)

Start by playing with the "_mean" sliders first - these will have the most influence over the resulting `HK` array.

As you move these slides, pay attention to how the resulting `HK` array changes - it should become appearent that the relation between the hyper parameters and the resulting `HK` array is highly nonlinear (and this is before even "running the model" and trying to estimate property values and posterior uncertainties.  However, you can generate more geologically "pleasing" patterns with these hyperparameters.

Using these sliders, how could you get back to simple stationary, 2-point geostatistical simulation we explored above?  Well to do this, we need to remove the influence of the spatially varying hyperparameters.  So, if we set the correlation lengths ("_corrlen") to large values and the anisotropy ("_anisotropy") sliders to small values, we essentially reduce this back to a standard 2-point geostatical simulation.  Try this and then play with the "_mean" sliders - the relation between the "_mean" sliders and the resulting `HK` arrays now behaves almost exactly like the simple slider widget in the previous example.