# Marginal Value Theorem
A place to test experimental parameters for foraging behavior in line with the marginal value theorem.

## Initial setup

### Imports

In [None]:
# Numerical tools
import numpy as np
from scipy.optimize import broyden1

# Plotting tools
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
%matplotlib widget
import matplotlib.pyplot as plt

# General tools
import copy
import warnings
warnings.filterwarnings('ignore', 'DeprecationWarning')

# Custom modules
import sys
sys.path.insert(0, '../python')
import helper

### Selection widget

In [None]:
class SelectionSlider(widgets.SelectionSlider):
    
    def __init__(self, *args, return_index=True, transform=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.return_index = return_index
        if transform is not None:
            self.transform = transform
        else:
            self.transform = lambda x: x
        
    def get_interact_value(self):
        if self.return_index:
            return (self.transform(self.value), self.index)
        else:
            return self.transform(self.value)

In [None]:
default_kwargs = dict(
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    return_index=True,
    transform=None
)

## Theory

### Optimal residence time
Let's define a patchy environment as consisting of the following parameters:

$\quad T_{p}^{(i)}$: time spent harvesting reward in patch $i$  
$\quad T_{t}^{(i)}$: time spent traveling to patch $i$  
$\quad R^{(i)}(t^{(i)})$: amount of reward harvested in patch $i$ after time $t^{(i)}$ (i.e. the *gain function*)  
$\quad s$: search cost per unit time

The average reward intake $\bar{E}$ across the environment is:

$
\begin{align}
\quad \bar{E} 
&= \frac{\text{total energy}}{\text{total time}} \\
&= \frac{\sum_{i} \left ( R^{(i)}(T_{p}^{(i)}) - sT_{t}^{(i)} \right )}{\sum_{i} \left ( T_{t}^{(i)} + T_{p}^{(i)} \right )} \\
&= \frac{R^{(i)}(T_{p}^{(i)}) - sT_{t}^{(i)} + k^{(i)}}{T_{t}^{(i)} + T_{p}^{(i)} + c^{(i)}}
\end{align}
$

where $k^{(i)} = \sum_{j \neq i} \left ( R^{(j)}(T_{p}^{(j)}) - sT_{t}^{(j)} \right )$ and $c^{(i)} = \sum_{j \neq i} \left ( T_{t}^{(j)} + T_{p}^{(j)} \right )$. Given that $\mathbf{T}_p = \{T_{p}^{(i)}\}$ are the only behavioral parameters in this model, we can define an optimal behavior vector as $\mathbf{T}_p^* = \{T_{p}^{*(i)}\}$. To solve for optimal behavior, we differentiate $\bar{E}$ with respect to $\mathbf{T}_p$:

$
\begin{align}
\quad \dfrac{\partial \bar{E}}{\partial \mathbf{T}_p} 
&= \left \{ \begin{matrix} ... & \dfrac{\partial \bar{E}}{\partial \mathbf{T}_p^{(i)}} & ... \end{matrix} \right \} \\
&= \left \{ \begin{matrix} ... & \dfrac{\partial}{\partial \mathbf{T}_p^{(i)}} \left ( \dfrac{R^{(i)}(T_{p}^{(i)}) - sT_{t}^{(i)} + k^{(i)}}{T_{t}^{(i)} + T_{p}^{(i)} + c^{(i)}} \right ) & ... \end{matrix} \right \} \\
&= \left \{ \begin{matrix} ... & \dfrac{r^{(i)}(T_{p}^{(i)}) \left ( T_{t}^{(i)} + T_{p}^{(i)} + c^{(i)} \right ) - \left ( R^{(i)}(T_{p}^{(i)}) - sT_{t}^{(i)} + k^{(i)} \right )}{\left ( T_{t}^{(i)} + T_{p}^{(i)} + c^{(i)} \right )^2} & ... \end{matrix} \right \}
\end{align}$


and note that $\bar{E}$ is maximized when $\frac{\partial \bar{E}}{\partial \mathbf{T}_p} = \mathbf{0}$:

$
\begin{align}
\quad \mathbf{0} &= \left \{ \begin{matrix} ... & \dfrac{r^{(i)}(T_{p}^{*(i)}) \left ( T_{t}^{(i)} + T_{p}^{*(i)} + c^{(i)} \right ) - \left ( R^{(i)}(T_{p}^{*(i)}) - sT_{t}^{(i)} + k^{(i)} \right )}{\left ( T_{t}^{(i)} + T_{p}^{*(i)} + c^{(i)} \right )^2} & ... \end{matrix} \right \} \\
&= \left \{ \begin{matrix} ... & r^{(i)}(T_{p}^{*(i)}) \left ( T_{t}^{(i)} + T_{p}^{*(i)} + c^{(i)} \right ) - \left ( R^{(i)}(T_{p}^{*(i)}) - sT_{t}^{(i)} + k^{(i)} \right ) & ... \end{matrix} \right \} \\
\end{align} \\
\quad \Rightarrow r^{(i)}(T_{p}^{*(i)}) \left ( T_{t}^{(i)} + T_{p}^{*(i)} + c^{(i)} \right ) = R^{(i)}(T_{p}^{*(i)}) - sT_{t}^{(i)} + k^{(i)} \\
\quad \Rightarrow r^{(i)}(T_{p}^{*(i)}) = \dfrac{R^{(i)}(T_{p}^{*(i)}) - sT_{t}^{(i)} + k^{(i)}}{T_{t}^{(i)} + T_{p}^{*(i)} + c^{(i)}} = \bar{E}
$

or in the single-patch case ($i=1$):

$
\quad r(T_{p}^{*}) = \dfrac{R(T_{p}^{*}) - sT_{t}}{T_{t} + T_{p}^{*}} = \bar{E}
$

where $r(t) = \frac{\mathrm{d} R}{\mathrm{d} t}$.

Therefore, the optimal patch residence time for each patch $T_{p}^{*(i)}$ occurs when the marginal gain in that patch equals the average rate of return across the environment; this is called the **marginal value theorem** (MVT). If we model the gain function based on an exponentially decaying rate of return within a given patch:

$
\quad r(T_{p}) = r_0 e^{-\frac{T_p}{\tau}} \\
\quad R(T_{p}) = \int_{0}^{T_p} r(t)dt = r_0 \tau \left ( 1 - e^{-\frac{T_p}{\tau}} \right ) + R_0
$

then, for the single-patch case, the MVT equation becomes:

$
\quad r(T_{p}^{*}) = \dfrac{R(T_{p}^{*}) - sT_{t}}{T_{t} + T_{p}^{*}} \\
\quad r_0 e^{-\frac{T_p^{*}}{\tau}} = \dfrac{r_0 \tau \left ( 1 - e^{-\frac{T_p^{*}}{\tau}} \right ) + R_0 - sT_{t}}{T_{t} + T_{p}^{*}} \\
\quad \Rightarrow r_0 e^{-\frac{T_p^{*}}{\tau}} \left ( T_{t} + T_{p}^{*} \right ) = r_0 \tau - r_0 \tau  e^{-\frac{T_p^{*}}{\tau}} + R_0 - sT_{t} \\
\quad \Rightarrow r_0 e^{-\frac{T_p^{*}}{\tau}} \left ( T_{t} + T_{p}^{*} + \tau \right ) - r_0 \tau - R_0 + sT_{t} = 0
$

As a sanity check, let's solve for the optimal residence time a slightly different way. If we are trying to maximize our average harvest rate across an environment, $\bar{R}(T_p)$, then we can simply set the derivative of this intake rate to zero and solve to find the maximum. First, the derivative is:

$
\quad \bar{R}(T_p) = \dfrac{R(T_{p}) - sT_{t}}{T_{t} + T_{p}} \\
\begin{align}
\quad \dfrac{d \bar{R}}{d T_p} 
&= \dfrac{\dfrac{d}{d T_p} \left ( R(T_p) - sT_t \right ) \left ( T_t + T_p \right ) - \left ( R(T_p) - sT_t \right ) \left ( \dfrac{d}{d T_p} \left ( T_t + T_p \right ) \right )}{\left ( T_t + T_p \right )^2} \quad \text{(by the product rule)} \\
&= \dfrac{\left ( \dfrac{d R}{d T_p} \right ) \left ( T_t + T_p \right ) - R(T_p) + sT_t}{\left ( T_t + T_p \right )^2} \\
&= \dfrac{r(T_p) \left ( T_t + T_p \right ) - R(T_p) + sT_t}{\left ( T_t + T_p \right )^2} \quad \text{(by definition)}
\end{align}
$

Setting this to zero, we get:

$
\begin{align}
\quad 0 
&= \dfrac{r(T^*_p) \left ( T_t + T^*_p \right ) - R(T^*_p) + sT_t}{\left ( T_t + T^*_p \right )^2} \\
&= r(T^*_p) \left ( T_t + T^*_p \right ) - R(T^*_p) + sT_t \\
&\Rightarrow r(T^*_p) = \dfrac{R(T^*_p) - sT_t}{T_t + T^*_p}
\end{align}
$

arriving at the same equation as above.

### Additional rewards and costs
Additional travel time can be modeled as beneficial rest.

## Parameter tool
Let's rewrite the code to 1) better display the data and 2) be a single code block to visualize any parameter manipulation.

There are five parameters ($T_P, T_T, R_0, r_0, \tau$), of which we can display three at a time using heatmaps. Because one of those, the unknown, is fixed, we have $C^4_2 = \frac{4!}{2! 2!} = 6$ graphs to display. Instead of displaying all graphs at once, we will instead allow the user to select which variable(s) to plot using either 1D (curve) or 2D (heatmap) visualization. Additionally, we will implement a selection tool for each fixed variable (i.e. not displayed) in order to quickly update the plots. 

In [None]:
# Behavior parameters
t_p = np.linspace(5.0, 60.0, num=56)
t_t = np.linspace(5.0, 30.0, num=26)

# Environment parameters
# Remember, R_0 / r_0 must be ≥ t_t, otherwise R_0 will default to zero!
R_0 = np.array([0.0])
r_0 = np.linspace(0.5, 5.0, num=19)
tau = None

# Parameters (do not change order!!!)
params = {'t_p': t_p,
          't_t': t_t,
          'R_0': R_0,
          'r_0': r_0,
          'tau': tau}
assert len([k for k, v in params.items() if v is None]) == 1

In [None]:
X = {k: v for k, v in params.items() if v is not None}
name = [k for k, v in params.items() if v is None][0]
soln, R_opt, is_solvable = helper.get_optimal_values(**params, 
                                                     return_solvable=True, 
                                                     min_value=0.01, 
                                                     max_value=1000.0)
Y = {name: soln,
     'R_opt': R_opt}

In [None]:
#%matplotlib widget # calling the magic function again closes previous figures

# Parameter settings
x_name = 't_p'

# Create widgets
w1D = {}
for name, x in X.items():
    if name != x_name:
        w1D[name] = SelectionSlider(options=x,
                                    value=x[0],
                                    description=name,
                                    **default_kwargs)

# Create initial plot
fig1D, ax1D = plt.subplots(1, 2, figsize=(8.0, 3.0))
idx = tuple([0 if name != x_name else slice(None) for name, x in X.items()])
lines = []
for i, (name, y) in enumerate(Y.items()):
    h, = ax1D[i].plot(X[x_name], y[idx], color='black', alpha=0.7)
    lines.append(h)
    ax1D[i].set_xlabel(x_name)
    ax1D[i].set_ylabel(name)
    ax1D[i].set_title('{} vs. {}'.format(name, x_name))
plt.tight_layout()
        
# Create update function
x_idx = list(X.keys()).index(x_name)
def update_plot(**kwargs):
    # Get indices to display
    idx = [val[1] for name, val in kwargs.items()] # val = (w.value, w.index)
    idx.insert(x_idx, slice(None)) # slice across x1 axis
    idx = tuple(idx)
    
    # Update curve
    for i, (name, y) in enumerate(Y.items()):
        lines[i].set_ydata(y[idx])
        ax1D[i].relim()
        ax1D[i].autoscale_view()
    fig1D.canvas.draw_idle()
    
widgets.interact(update_plot, **w1D);

In [None]:
#%matplotlib widget # calling the magic function again closes previous figures

# Parameter settings
x1_name = 't_p'
x2_name = 't_t'
Y_range = [0.0, 1000.0]

# Create widgets
w2D = {} # different name from above because widgets persist in background
for name, x in X.items():
    if name not in [x1_name, x2_name]:
        w2D[name] = SelectionSlider(options=x,
                                    value=x[0],
                                    description=name,
                                    **default_kwargs)

# Create color palette
palette = copy.copy(plt.cm.coolwarm)
palette.set_under('black', Y_range[0])
palette.set_over('black', Y_range[1])
palette.set_bad(alpha=0.0)
        
# Create initial heatmaps
fig2D, ax2D = plt.subplots(1, 2, figsize=(8.0, 3.0))
idx = tuple([0 if name not in [x1_name, x2_name] else slice(None) 
             for name, x in X.items()])
images = []
cbars = []
for i, (name, y) in enumerate(Y.items()):
    # Create heatmap
    im = ax2D[i].imshow(y[idx],
                        cmap=palette,
                        aspect='auto',
                        origin='lower',
                        vmin=y[idx].min(),
                        vmax=y[idx].max())
    images.append(im)
    ax2D[i].set_xlabel(x1_name)
    ax2D[i].set_ylabel(x2_name)
    ax2D[i].set_title('{} vs. ({}, {})'.format(name, x1_name, x2_name))
    
    # Create colorbar
    cbar = fig2D.colorbar(im, ax=ax2D[i])
    cbar.set_label(name)
    cbars.append(cbar)
    
plt.tight_layout()

# Create update function
x1_idx = list(X.keys()).index(x1_name)
x2_idx = list(X.keys()).index(x2_name)
def update_heatmap(**kwargs):
    # Get indices to display
    idx = [val[1] for name, val in kwargs.items()] # val = (w.value, w.index)
    idx.insert(x1_idx, slice(None)) # slice across x1 axis
    idx.insert(x2_idx, slice(None)) # slice across x2 axis
    idx = tuple(idx)
    
    # Update heatmaps
    for i, (name, y) in enumerate(Y.items()):
        images[i].set_data(y[idx])
        cbars[i].mappable.set_clim(vmin=y[idx].min(), vmax=y[idx].max())
        ax2D[i].relim()
        ax2D[i].autoscale_view()
    fig2D.canvas.draw_idle()
    
widgets.interact(update_heatmap, **w2D);

## Archive

### Fit environment to behavior
Given preferred patch residence and travel times, what is the environment in which such behavior is optimal?

In [None]:
# Behavior parameters (fixed)
t_p = 20
t_t = 10

# Environment parameters (varied)
# Remember, R_0 / r_0 must be ≥ t_t, otherwise R_0 will default to zero!
R_0 = np.array([0.0, 2.0, 6.0, 8.0, 10.0, 12.0])
r_0 = np.arange(50, 501, 25)/100

# Solve for tau, optimum cumulative reward
tau = np.zeros([R_0.shape[0], r_0.shape[0]])
R_opt = np.zeros([R_0.shape[0], r_0.shape[0]])
is_solvable = np.ones([R_0.shape[0], r_0.shape[0]], dtype=np.bool)
for i, R_0_ in enumerate(R_0):
    tau[i, :], R_opt[i, :], is_solvable[i, :] = get_optimal_values(t_p=t_p, 
                                                                   t_t=t_t, 
                                                                   R_0=R_0_, 
                                                                   r_0=r_0, 
                                                                   return_solvable=True)

In [None]:
fig, ax = plt.subplots(tau.shape[0], 2, figsize=(10, 4*tau.shape[0]))

for i in range(tau.shape[0]):
    ax[i, 0].plot(r_0[is_solvable[i]], tau[i, is_solvable[i]], linestyle='-', color='gray')
    ax[i, 0].plot(r_0[np.invert(is_solvable[i])], tau[i, np.invert(is_solvable[i])], linestyle='--', color='gray') 
    ax[i, 0].set_xlabel('r_0 (uL/s)')
    ax[i, 0].set_ylabel('tau (s)')
    ax[i, 0].set_title('t_p=%d, t_t=%d, R_0=%d' % (t_p, t_t, R_0[i]))
    ax[i, 0].set_yscale('log')
    
    ax[i, 1].plot(r_0[is_solvable[i]], R_opt[i, is_solvable[i]], linestyle='-', color='gray')
    ax[i, 1].plot(r_0[np.invert(is_solvable[i])], R_opt[i, np.invert(is_solvable[i])], linestyle='--', color='gray')
    ax[i, 1].set_xlabel('r_0 (uL/s)')
    ax[i, 1].set_ylabel('R_opt (uL)')
    ax[i, 1].set_title('t_p=%d, t_t=%d, R_0=%d' % (t_p, t_t, R_0[i]))

ax[0, 0].set_ylim([1.0, 100.0])    
plt.tight_layout()

### Fit behavior to environment
Given environmental parameters, what is optimum behavior?

In [None]:
# Behavior parameters
t_t = 10

# Environment parameters
# Remember, R_0 / r_0 must be ≥ t_t, otherwise R_0 will default to zero!
R_0 = np.array([0.0])
r_0 = np.arange(50, 501, 50)/100
tau = np.arange(10, 60, 5)

# Solve for residence time, optimum cumulative reward
t_p = np.zeros([tau.shape[0], r_0.shape[0]])
R_opt = np.zeros([tau.shape[0], r_0.shape[0]])
is_solvable = np.ones([tau.shape[0], r_0.shape[0]], dtype=np.bool)
for i, tau_ in enumerate(tau):
    t_p[i, :], R_opt[i, :], is_solvable[i, :] = get_optimal_values(t_t=t_t, 
                                                                   R_0=R_0, 
                                                                   r_0=r_0,
                                                                   tau=tau_,
                                                                   return_solvable=True)

In [None]:
fig, ax = plt.subplots(t_p.shape[0], 2, figsize=(10, 20))

for i in range(t_p.shape[0]):
    ax[i, 0].plot(r_0[is_solvable[i]], t_p[i, is_solvable[i]], linestyle='-', color='gray')
    ax[i, 0].plot(r_0[np.invert(is_solvable[i])], t_p[i, np.invert(is_solvable[i])], linestyle='--', color='gray') 
    ax[i, 0].set_xlabel('r_0 (uL/s)')
    ax[i, 0].set_ylabel('t_p (s)')
    ax[i, 0].set_title('tau=%d, t_t=%d, R_0=%d' % (tau[i], t_t, R_0))
    
    ax[i, 1].plot(r_0[is_solvable[i]], R_opt[i, is_solvable[i]], linestyle='-', color='gray')
    ax[i, 1].plot(r_0[np.invert(is_solvable[i])], R_opt[i, np.invert(is_solvable[i])], linestyle='--', color='gray')
    ax[i, 1].set_xlabel('r_0 (uL/s)')
    ax[i, 1].set_ylabel('R_opt (uL)')
    ax[i, 1].set_title('tau=%d, t_t=%d, R_0=%d' % (tau[i], t_t, R_0))

#ax[0, 0].set_ylim([0.0, 100.0])    
plt.tight_layout()

In [None]:
# vary travel time and solve for residence time; see what values double residence time
# Behavior parameters
t_t = np.arange(5, 41, 5)

# Environment parameters
# Remember, R_0 / r_0 must be ≥ t_t, otherwise R_0 will default to zero!
R_0 = np.array([0.0])
r_0 = np.arange(50, 501, 50)/100
tau = np.array([24.0])

# Solve for residence time, optimum cumulative reward
t_p = np.zeros([t_t.shape[0], r_0.shape[0]])
R_opt = np.zeros([t_t.shape[0], r_0.shape[0]])
is_solvable = np.ones([t_t.shape[0], r_0.shape[0]], dtype=np.bool)
for i, t_t_ in enumerate(t_t):
    t_p[i, :], R_opt[i, :], is_solvable[i, :] = get_optimal_values(t_t=t_t_, 
                                                                   R_0=R_0, 
                                                                   r_0=r_0,
                                                                   tau=tau,
                                                                   return_solvable=True)

In [None]:
fig, ax = plt.subplots(t_p.shape[0], 2, figsize=(10, 20))

for i in range(t_p.shape[0]):
    ax[i, 0].plot(r_0[is_solvable[i]], t_p[i, is_solvable[i]], linestyle='-', color='gray')
    ax[i, 0].plot(r_0[np.invert(is_solvable[i])], t_p[i, np.invert(is_solvable[i])], linestyle='--', color='gray') 
    ax[i, 0].set_xlabel('r_0 (uL/s)')
    ax[i, 0].set_ylabel('t_p (s)')
    ax[i, 0].set_title('tau=%d, t_t=%d, R_0=%d' % (tau, t_t[i], R_0))
    
    ax[i, 1].plot(r_0[is_solvable[i]], R_opt[i, is_solvable[i]], linestyle='-', color='gray')
    ax[i, 1].plot(r_0[np.invert(is_solvable[i])], R_opt[i, np.invert(is_solvable[i])], linestyle='--', color='gray')
    ax[i, 1].set_xlabel('r_0 (uL/s)')
    ax[i, 1].set_ylabel('R_opt (uL)')
    ax[i, 1].set_title('tau=%d, t_t=%d, R_0=%d' % (tau, t_t[i], R_0))

#ax[0, 0].set_ylim([0.0, 100.0])    
plt.tight_layout()

#### How does optimal harvest rate change with different environments?

In [None]:
# Behavior parameters
t_t = 5

# Environment parameters
# Remember, R_0 / r_0 must be ≥ t_t, otherwise R_0 will default to zero!
R_0 = np.array([0.0])
r_0 = np.arange(50, 501, 50)/100
tau = np.arange(10, 30, 1)

# Solve for residence time, optimum cumulative reward
t_p = np.zeros([r_0.shape[0], tau.shape[0]])
R_opt = np.zeros([r_0.shape[0], tau.shape[0]])
is_solvable = np.ones([r_0.shape[0], tau.shape[0]], dtype=np.bool)
for i, r_0_ in enumerate(r_0):
    t_p[i, :], R_opt[i, :], is_solvable[i, :] = get_optimal_values(t_t=t_t, 
                                                                   R_0=R_0, 
                                                                   r_0=r_0_,
                                                                   tau=tau,
                                                                   return_solvable=True)

In [None]:
fig, ax = plt.subplots(r_0.shape[0], 2, figsize=(10, 20))

for i in range(r_0.shape[0]):
    ax[i, 0].plot(tau[is_solvable[i]], t_p[i, is_solvable[i]], linestyle='-', color='gray')
    ax[i, 0].plot(tau[np.invert(is_solvable[i])], t_p[i, np.invert(is_solvable[i])], linestyle='--', color='gray') 
    ax[i, 0].set_xlabel('tau (uL/s)')
    ax[i, 0].set_ylabel('t_p (s)')
    ax[i, 0].set_title('r_0=%.2f, t_t=%d, R_0=%d' % (r_0[i], t_t, R_0))
    
    ax[i, 1].plot(tau[is_solvable[i]], R_opt[i, is_solvable[i]]/(t_t + t_p[i, is_solvable[i]]), 
                  linestyle='-', color='gray')
    ax[i, 1].plot(tau[np.invert(is_solvable[i])], R_opt[i, np.invert(is_solvable[i])]/(t_t + t_p[i, np.invert(is_solvable[i])]), 
                  linestyle='--', color='gray') 
    ax[i, 1].set_xlabel('tau (uL/s)')
    ax[i, 1].set_ylabel('hr (uL/s)')
    ax[i, 1].set_title('r_0=%.2f, t_t=%d, R_0=%d' % (r_0[i], t_t, R_0))

plt.tight_layout()

In [None]:
t_t_ = 10
R_0_ = R_0[0]
r_0_ = 2.0
t_p_ = t_p[np.argwhere(t_t == t_t_)[0], np.argwhere(r_0 == r_0_)[0]] 
tau_ = tau[0]

print('t_p = %.2f, t_t = %.2f, R_0 = %.2f, r_0 = %.2f, tau = %.2f' % (t_p_, t_t_, R_0_, r_0_, tau_))

In [None]:
t_t_ = 25
R_0_ = R_0[0]
r_0_ = 2.0
t_p_ = t_p[np.argwhere(t_t == t_t_)[0], np.argwhere(r_0 == r_0_)[0]] 
tau_ = tau[0]

print('t_p = %.2f, t_t = %.2f, R_0 = %.2f, r_0 = %.2f, tau = %.2f' % (t_p_, t_t_, R_0_, r_0_, tau_))

### Fixed plotting tool

In [None]:
%matplotlib inline
fig, ax = plt.subplots(2, 6, figsize=(15, 5))

k = 0
Y_range = [0.0, 50.0]

# Create custom colormap
# (see https://matplotlib.org/3.1.0/gallery/images_contours_and_fields/image_masked.html)
palette = copy.copy(plt.cm.coolwarm)
palette.set_under('black', Y_range[0])
palette.set_over('black', Y_range[1])
palette.set_bad(alpha=0.0)
for i, (name, x) in enumerate(X.items()):
    # Plot parameter vs. unknown
    if isinstance(x, np.ndarray):
        if x.size > 1:
            ax[0, i].plot(x, helper.random_slice(Y, axis=i))
            ax[0, i].set_xlabel(x_name)
            ax[0, i].set_ylabel(Y_name)
            #ax[0, i].set_title(', '.join(['{}={}'.format(name, val) for name, val in ])
        else:
            ax[0, i].axis('off')
    else:
        ax[0, i].axis('off')
    
    # Plot heatmap with other parameters
    for j, (x_name_, x_) in enumerate(zip(X_name, X)):
        if j > i:
            h = ax[1, k].imshow(helper.random_slice(Y, axis=(i, j)),
                                cmap=palette,
                                aspect='auto',
                                origin='lower',
                                vmin=Y_range[0],
                                vmax=Y_range[1])
            ax[1, k].set_xlabel(x_name_)
            ax[1, k].set_ylabel(x_name)
            k += 1
            
fig.colorbar(h, ax=ax[1, k-1])
plt.tight_layout()