In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import numpy.random as random
import scipy.io, scipy.interpolate
import matplotlib.pyplot as plt

## Review

Say we want to sample from some probability density $P(x)$.
There are two especially important parts of MCMC sampling:

* The **transition probability** $Q(x | y)$: given a state $x_k$, the next candidate $x_{k + 1}$ is generated by sampling from $Q(x | x_k)$.
* The **accept/reject** criterion: accept $x_{k + 1}$ with probability $\min(1, P(x_{k + 1})/P(x_k))$.

Usually, $Q(x | y)$ = normal distribution with mean $y$.
But this choice is arbitrary and we could use anything, so long as the transitions are reversible: $Q(x | y) = Q(y | x)$.

**Hamiltonian Monte Carlo is a clever choice of transition probability.**

## Hamiltonian mechanics

**Hamiltonian mechanics** is a particular way of describing classical physical systems.

* **The players**: position $q$, momentum $p$, and the total energy $H(q, p)$ of the system
* **The rules**: Hamilton's equations of motion,
$$\begin{align}
\dot q & = +\frac{\partial H}{\partial p} \\
\dot p & = -\frac{\partial H}{\partial q}
\end{align}$$
* When $p = m\dot q$, and $H = $ kinetic energy + potential energy, Hamilton's equations of motion are equivalent to Newton's.

Some very important things:
* The energy $H$ is conserved along trajectories of the ODE.
* The volume in phase space is conserved.
Take a "blob" $D$ of position/momentum pairs, now evolve them all for a time $t$ using Hamilton's equations; this gives a morphed blob, $D_t$.
Then $\mathrm{vol}(D) = \mathrm{vol}(D_t)$.

## Hamiltonian Monte Carlo

MCMC simulation works with any reversible transition kernel.
The idea of HMC is to augment the state $q$ with a *pseudo-momentum* variable $p$ and use Hamiltonian dynamics to update both $q$ and $p$.

## Some real data

Let's try and apply this to David's data from last week.
We're going to do a little more cleaning up to remove duplicate points.

In [None]:
layers = scipy.io.loadmat('layers.mat')
sx, sy = layers['psx_layers'][0], layers['psy_layers'][0]
dx = np.sqrt(np.diff(sx)**2 + np.diff(sy)**2)
x = np.hstack(([0], np.cumsum(dx)))[:5000]
target_layer = layers['layer_14'][0][:5000]

# Find any points that are duplicated or where there's data missing and remove them
repeat_point_indices = np.where(dx < 1.0)[0] + 1
no_data_indices = np.where(np.isnan(target_layer))[0]
good_indices = set(range(5000)) - set(repeat_point_indices) - set(no_data_indices)
indices = np.array(list(good_indices))
indices.sort()

x = x[indices]
target_layer = target_layer[indices]

In [None]:
more_data = scipy.io.loadmat('vels.mat')
vel_interpolater = scipy.interpolate.interp1d(more_data['dists'][0], more_data['vels'][0])
velocity = vel_interpolater(x)
acc_interpolater = scipy.interpolate.interp1d(more_data['acc_dists'][0], more_data['acc'][0])
accumulation = acc_interpolater(x) * 1.4  # need a little cheating to help us along

In [None]:
import model

total_time = 725.0
num_steps = 40

accumulation_scale = 1.08 * np.ones(num_steps)
velocity_scale = 1.05 * np.ones(num_steps)

z = model.layer_depth(x, accumulation_scale, velocity_scale,
                      accumulation, velocity, total_time, num_steps)

In [None]:
fig, ax = plt.subplots()
ax.plot(x/1000, target_layer, color='k', label='Data')
ax.plot(x/1000, z[num_steps - 1, :], color='b', label='Model')
ax.set_xlabel("x (km)")
fig.legend()
plt.show(fig)

In [None]:
λ_final = target_layer - z[-1, :]
λ = model.adjoint_solve(x, accumulation_scale, velocity_scale,
                        accumulation, velocity, z, λ_final, total_time, num_steps)

print("Numerical range of adjoint state: ({}, {})".format(np.min(λ), np.max(λ)))

In [None]:
fig, ax = plt.subplots()
ax.plot(x/1000, λ[0, :], color='k')
plt.show(fig)

In [None]:
dJ_da = model.sensitivity_ascale(x, z, λ, accumulation)

In [None]:
dt = total_time / num_steps
fig, ax = plt.subplots()
ax.plot(np.array(range(num_steps)) * dt, dJ_da, color='k')
plt.show(fig)

In [None]:
da = np.array([k/num_steps * (1 - k/num_steps) for k in range(num_steps)])
dJ = np.dot(dJ_da, da) * dt
print("{:e}".format(dJ))

In [None]:
error_0 = model.mean_square_misfit(x, z[-1, :], target_layer)
print("{:e}".format(error_0))

for k in range(20):
    δ = 1.0/2**k
    zk = model.layer_depth(x, accumulation_scale + δ * da, velocity_scale,
                           accumulation, velocity, total_time, num_steps)
    error = model.mean_square_misfit(x, zk[-1, :], target_layer)
    
    print("{:e}, {:e}".format((error - error_0)/δ, dJ))

In [None]:
dJ_du = model.sensitivity_uscale(x, z, λ, velocity)

In [None]:
dt = total_time / num_steps
fig, ax = plt.subplots()
ax.plot(np.array(range(num_steps)) * dt, dJ_du, color='k')
plt.show(fig)

In [None]:
du = -0.1 * np.ones(num_steps)
dJ = np.dot(dJ_du, du) * dt
print("{:e}".format(dJ))

In [None]:
error_0 = model.mean_square_misfit(x, z[-1, :], target_layer)
print("{:e}".format(error_0))

for k in range(20):
    δ = 1.0/2**k
    zk = model.layer_depth(x, accumulation_scale, velocity_scale + δ * du,
                           accumulation, velocity, total_time, num_steps)
    error = model.mean_square_misfit(x, zk[-1, :], target_layer)
    
    print("{:e}, {:e}".format((error - error_0)/δ, dJ))