In [None]:
import sys
sys.path.insert(0, '..')
from dhmc.dhmc_sampler import DHMCSampler
from benchmarking_util import summarize_sim_results
    # Utility functions to summarize the simulation results.

In [None]:
import numpy as np
import math
import time
import pickle as pkl
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
%matplotlib inline

### Import functions to compute the posterior of the Jolly-Seber model based on the black-kneed capsid data from Seber (1982).

In [None]:
from data_and_posterior.jolly_seber_model \
    import f, f_update
from data_and_posterior.jolly_seber_model \
    import pack_param, unpack_param, index, n_param, n_disc, n_cont

#### Test the gradient and coordinatewise update function.

In [None]:
# Start from stationary point.
U0 = np.array([299, 371, 375, 436, 690, 480, 404, 619, 187, 163, 196, 261, 464])
phi0 = np.array([.67, .87, .92, .54, .76, .90, .63, .95, .88, .92, .96, .95])
p0 = np.array([.28, .44, .35, .33, .21, .29, .36, .24, .35, .24, .20, .20, .13])
theta0 = pack_param(p0, phi0, U0)

In [None]:
scale = np.ones(n_param)
dhmc = DHMCSampler(f, f_update, n_disc, n_param, scale)
dhmc.test_cont_grad(theta0, sd=1, n_test=10);
_, theta, logp_diff, logp_diff_update = \
    dhmc.test_update(theta0, sd=.1, n_test=10)

### DHMC with an identity mass matrix

In [None]:
n_rep = 8

In [None]:
n_burnin = 10 ** 3
n_sample = 1 * 10 ** 4
n_update = 10
dt = .025 * np.array([.8, 1])
nstep = [70, 85]

def dhmc_simulation(seed):
    samples, logp_samples, accept_prob, nfevals_per_itr, time_elapsed = \
        dhmc.run_sampler(theta0, dt, nstep, n_burnin, n_sample, seed=seed)
    samples = samples[n_burnin:, :]
    logp_samples = logp_samples[n_burnin:]
    time_elapsed *= n_sample / (n_sample + n_burnin) # Adjust for the burn-in time.
    summary = summarize_sim_results(
        samples, time_elapsed, nfevals_per_itr, n_sample, n_burnin, theta0, seed
    )
    return summary

In [None]:
sim_result = Parallel(n_jobs=4)(delayed(dhmc_simulation)(i) for i in range(n_rep))

In [None]:
filename = 'jolly_seber_dhmc_simulation.pkl'
with open(filename, 'wb') as file:
    pkl.dump(sim_result, file)

### DHMC with a diaognal mass matrix

In [None]:
filename = 'jolly_seber_dhmc_output.pkl'
with open(filename, 'rb') as file:
    mcmc_output = pkl.load(file)
scale = np.std(mcmc_output['samples'], 0)
scale /= np.max(scale)
dhmc = DHMCSampler(f, f_update, n_disc, n_param, scale)

n_burnin = 10 ** 3
n_sample = 1 * 10 ** 4
n_update = 10
dt = .175 * np.array([.8, 1])
nstep = [40, 50]

def dhmc_simulation(seed):
    samples, logp_samples, accept_prob, nfevals_per_itr, time_elapsed = \
        dhmc.run_sampler(theta0, dt, nstep, n_burnin, n_sample, seed=seed)
    samples = samples[n_burnin:, :]
    logp_samples = logp_samples[n_burnin:]
    time_elapsed *= n_sample / (n_sample + n_burnin) # Adjust for the burn-in time.
    return summarize_sim_results(samples, time_elapsed, nfevals_per_itr, 
                                 n_sample, n_burnin, theta0, seed)

In [None]:
sim_result = Parallel(n_jobs=4)(delayed(dhmc_simulation)(i) for i in range(n_rep))

In [None]:
filename = 'jolly_seber_adap_dhmc_simulation.pkl'
with open(filename, 'wb') as file:
    pkl.dump(sim_result, file)

### Gibbs + NUTS sampler for comparison.

In [None]:
from data_and_posterior.jolly_seber_model import update_disc
from other_samplers.nuts_sampler import nuts

In [None]:
phi0 = .8 * np.ones(len(index["phi"]))
p0 = .15 * np.ones(len(index["p"]))
U0 = 500 * np.ones(len(index["U"]))
theta0 = pack_param(p0, phi0, U0)

In [None]:
def nuts_gibbs(f, theta, dt, logp, grad, max_depth):
    def f_cond(theta_cont):
        logp, grad, _ = f(np.concatenate((theta_cont, theta[n_cont:])))
        if not np.any(np.isnan(grad)):
            grad = grad[:n_cont]
        return logp, grad
    theta_cont, logp, grad, nuts_accept_prob, nfevals = \
        nuts(f_cond, np.random.uniform(dt[0], dt[1]), theta[:n_cont], logp, grad, max_depth, warnings=False)
    theta[:n_cont] = theta_cont
    theta = update_disc(theta)
    logp, grad, _ = f(theta)
    grad = grad[:n_cont]    
    nfevals += 1
    return theta, logp, grad, nuts_accept_prob, nfevals

In [None]:
n_burnin = 10 ** 3
n_sample = 1 * 10 ** 4
n_update = 1
dt = .025 * np.array([.8, 1]) # Same as DHMC.

# Run Gibbs with NUTS update for continuous variable.
def nuts_gibbs_simulation(seed):
    np.random.seed(seed)
    
    # Pre-allocate
    theta = theta0.copy()
    n_per_update = math.ceil((n_sample + n_burnin) / n_update)
    nfevals_total = 0
    samples = np.zeros((n_sample + n_burnin, len(theta)))
    logp_samples = np.zeros(n_sample + n_burnin)
    accept_prob = np.zeros(n_sample + n_burnin)
    
    # Run NUTS-Gibbs
    tic = time.process_time()
    logp, grad, _ = f(theta)
    grad = grad[:n_cont]
    for i in range(n_sample + n_burnin):
        theta, logp, grad, accept_prob[i], nfevals = \
            nuts_gibbs(f, theta, dt, logp, grad, max_depth=8)
        nfevals_total += nfevals + 1
        samples[i, :] = theta
        logp_samples[i] = logp
        if (i + 1) % n_per_update == 0:
            print('{:d} iterations have been completed.'.format(i+1))

    toc = time.process_time()
    time_elapsed = toc - tic
    time_elapsed *= n_sample / (n_sample + n_burnin) # Adjust for the burn-in time.  
    nfevals_per_itr = nfevals_total / (n_sample + n_burnin)
    print('Each iteration required {:.2f} likelihood evaluations on average.'.format(nfevals_per_itr))
    
    samples = samples[n_burnin:, :]
    logp_samples = logp_samples[n_burnin:]
    
    return summarize_sim_results(samples, time_elapsed, nfevals_per_itr, 
                                 n_sample, n_burnin, theta0, seed)

In [None]:
sim_result = Parallel(n_jobs=4)(delayed(nuts_gibbs_simulation)(i) for i in range(n_rep))

In [None]:
filename = 'jolly_seber_gibbs_simulation.pkl'
with open(filename, 'wb') as file:
    pkl.dump(sim_result, file)

### Try M-H sampler with an optimal proposal variance.

In [None]:
from other_samplers.adaptive_metropolis import adap_RWMH, RWMH

In [None]:
def f_logp(theta):
    logp, _, _ = f(theta, req_grad=False)
    return logp

n_warmup = 10 ** 4
n_cov_adap = 10 ** 4
n_adap_mcmc = 5 * 10 ** 5
n_sample = 5 * 10 ** 5

def mh_simulation(seed):
    np.random.seed(seed)
    
    # Run adaptive MH to estimate the covariance.
    stepsize = 2.38 / math.sqrt(n_param)
    samples, accept_rate = \
        adap_RWMH(f_logp, theta0, stepsize, n_warmup, n_cov_adap, n_adap_mcmc)
    Sigma = np.cov(samples.T)

    # Run MH with a fixed covariance.
    tic = time.process_time() # Start clock
    samples, accept_rate, stepsize_seq, ave_stepsize_seq = \
        RWMH(f_logp, theta0, stepsize, 0, n_sample, Sigma)

    toc = time.process_time()
    time_elapsed = toc - tic
    print('Sampling completed.')
    
    n_burnin = n_warmup + n_cov_adap + n_adap_mcmc
    nfevals_per_itr = 1
    return summarize_sim_results(samples, time_elapsed, nfevals_per_itr, 
                                 n_sample, n_burnin, theta0, seed)

In [None]:
sim_result = Parallel(n_jobs=4)(delayed(mh_simulation)(i) for i in range(n_rep))

In [None]:
filename = 'jolly_seber_mh_simulation.pkl'
with open(filename, 'wb') as file:
    pkl.dump(sim_result, file)