# `nb01`: Build, compute, critique, repeat

![](./figures/nb01/boxloop.png)

In this course, we will approach data science problems using the scientific method. We will build models, compute results, and critique our work. We will repeat this process until we are satisfied with our results. This process is called the **Box's Loop**. Upcoming lectures will dive into each of these steps in more detail.

# Build 

In [None]:
import numpy as np

# Generative model
def simulate(v, alpha, dt=0.001, G=9.81):
    v_x = v * np.cos(alpha)
    v_y = v * np.sin(alpha)

    x = 0
    y = 1.5

    trajectory = [(x, y)]

    while y > 0:
        v_y += dt * -G
        x += dt * v_x
        y += dt * v_y 
        trajectory.append((x, y))

    return np.array(trajectory)


In [None]:
import matplotlib.pyplot as plt 

v = 5.0
alpha = np.pi / 4 

trajectory = simulate(v, alpha, dt=0.001)
plt.plot(trajectory[:, 0], trajectory[:, 1])
plt.show()

In [None]:
# Probabilistic model
from scipy.stats import norm, uniform

def likelihood(v, alpha):
    trajectory = simulate(v, alpha)
    x = trajectory[-1, 0]
    return norm(loc=x, scale=0.1)

p_v = uniform(0, 10.)
p_alpha = uniform(0, np.pi / 2)

In [None]:
# p(x | v, alpha)
v = 5.0
alpha = np.pi / 4

trajectory = simulate(v, alpha)
plt.plot(trajectory[:, 0], trajectory[:, 1])

lk = likelihood(v, alpha)
xs = lk.rvs(10000)
plt.hist(xs, density=True, bins=30, color="orange")

plt.show()

In [None]:
# p(x)
xs = []

for i in range(1000):
    v = p_v.rvs()
    alpha = p_alpha.rvs()

    trajectory = simulate(v, alpha)
    plt.plot(trajectory[:, 0], trajectory[:, 1], color="b", alpha=0.05)
    x = likelihood(v, alpha).rvs()
    xs.append(x)

plt.hist(xs, density=True, bins=30, color="orange")
plt.show()

# Data

In [None]:
v_true = 5.0
alpha_true = np.pi / 4.
x_obs = likelihood(v_true, alpha_true).rvs()

In [None]:
trajectory = simulate(v_true, alpha_true)
plt.plot(trajectory[:, 0], trajectory[:, 1])

lk = likelihood(v_true, alpha_true)
xs = lk.rvs(10000)
plt.hist(xs, density=True, bins=30)
plt.scatter([x_obs], [0.])
plt.show()

# Compute

In [None]:
# Point-valued estimate
from scipy.optimize import minimize

def log_likelihood(theta, x):
    v, alpha = theta
    lk = likelihood(v, alpha)
    return lk.logpdf(x)

nll = lambda *args: -log_likelihood(*args)
initial = np.array([p_v.rvs(), p_alpha.rvs()])
soln = minimize(nll, initial, args=(x_obs,))
soln

In [None]:
v_sol, alpha_sol = soln.x 

trajectory = simulate(v_sol, alpha_sol)
plt.plot(trajectory[:, 0], trajectory[:, 1])
plt.scatter([x_obs], [0.])
plt.show()

In [None]:
# Uncertainty estimation
def log_prior(theta):
    v, alpha = theta 
    return p_v.logpdf(v) + p_alpha.logpdf(alpha)

def log_posterior(theta, x):
    lp = log_prior(theta)
    if not np.isfinite(lp):
        return -np.inf 
    return lp + log_likelihood(theta, x)

In [None]:
import emcee
pos = soln.x + 0.1 * np.random.randn(10, 2)
nwalkers, ndim = pos.shape 

sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=(x_obs,))
sampler.run_mcmc(pos, 10000, progress=True);

In [None]:
fig, axes = plt.subplots(2, figsize=(10, 7), sharex=True)
samples = sampler.get_chain()
labels = ["v", "alpha"]

for i in range(ndim):
    ax = axes[i]
    ax.plot(samples[:, :, i], "k", alpha=0.2)
    ax.set_xlim(0, len(samples))
    ax.set_ylabel(labels[i])

plt.show()

In [None]:
import corner
thetas = sampler.get_chain(flat=True)
fig = corner.corner(thetas, labels=labels, truths=[v_true, alpha_true])

# Critique

In [None]:
# Computational diagnostics 
tau = sampler.get_autocorr_time()
print(tau)

In [None]:
thetas = sampler.get_chain(flat=True, thin=50)
thetas.shape

In [None]:
fig = corner.corner(thetas, labels=labels, truths=[v_true, alpha_true])

In [None]:
# Posterior predictive checks
# p(x | x_obs)
xs = []

for v, alpha in thetas[:500]:
    trajectory = simulate(v, alpha)
    plt.plot(trajectory[:, 0], trajectory[:, 1], color="b", alpha=0.05)
    x = likelihood(v, alpha).rvs()
    xs.append(x)

plt.hist(xs, density=True, bins=30, color="orange")
plt.scatter([x_obs], [0.], c="r")
plt.savefig("box-loop-ppc.png")
plt.show()

# Improve and repeat

- What if `x_obs=15`?
- What if the height of the pitcher is unknown?
- What if there is wind? 
- What if, maybe, there is wind?
- How to evaluate and compare models?