### Warning: Joblib (ver 0.11) seems to freeze when running this notebook on Mac.

In [None]:
import sys
sys.path.insert(0, '..')
from dhmc.dhmc_sampler import DHMCSampler
from benchmarking_util import summarize_sim_results
    # Utility functions to summarize the simulation results.

In [None]:
import numpy as np
import math 
import time
import pickle as pkl
from joblib import Parallel, delayed

### Import functions to compute the 0-1 loss posterior based on SECOM data from UCI.

In [None]:
from data_and_posterior.pac_bayes_model \
    import y, X, f, f_update

#### Initial state for MCMC.

In [None]:
intercept0 = np.log(np.mean(y == 1) / (1 - np.mean(y == 1)))
beta0 = np.zeros(X.shape[1])
beta0[0] = intercept0
theta0 = beta0
n_param = len(theta0)
n_disc = n_param

#### Test the gradient and updating function.

In [None]:
scale = np.ones(n_param)
dhmc = DHMCSampler(f, f_update, n_disc, n_param, scale)
dhmc.test_cont_grad(theta0, sd=.01, n_test=10);
_, theta, logp_fdiff, logp_diff = \
    dhmc.test_update(theta0, sd=10, n_test=100)

### Run DHMC

In [None]:
n_rep = 8

In [None]:
n_burnin = 10 ** 3
n_sample = 10 ** 4
dt = .3 * np.array([.7, 1]) 
nstep = [20, 30] 

def dhmc_simulation(seed):
    samples, logp_samples, accept_prob, nfevals_per_itr, time_elapsed = \
        dhmc.run_sampler(theta0, dt, nstep, n_burnin, n_sample, seed=seed)
    samples = samples[n_burnin:, :]
    logp_samples = logp_samples[n_burnin:]
    summary = summarize_sim_results(
        samples, time_elapsed, nfevals_per_itr, n_sample, n_burnin, theta0, seed
    )
    return summary

sim_result = Parallel(n_jobs=4)(delayed(dhmc_simulation)(i) for i in range(n_rep))

In [None]:
filename = 'pac_bayes_dhmc_simulation.pkl'
with open(filename, 'wb') as file:
    pkl.dump(sim_result, file)

### Run Metropolis with optimal proposal covariance

In [None]:
from other_samplers.adaptive_metropolis import adap_RWMH, RWMH

In [None]:
def f_logp(theta):
    logp, _, _ = f(theta, req_grad=False)
    return logp

Use an estimated covariance matrix from a long DHMC chain.

In [None]:
filename = '../mcmc_output/pac_bayes_dhmc_output.pkl'
with open(filename, 'rb') as file:
    mcmc_output = pkl.load(file)
theta0_rwmh = mcmc_output['samples'][-1, :]
Sigma = mcmc_output['emp_cov']
stepsize = 2.38 / np.sqrt(n_param)

In [None]:
n_warmup = 10 ** 6
n_sample = 10 ** 5
thin = 100 # Total of 'n_sample * thin' iterations.
seed = 1

def met_simulation(seed):
    # Run MH with a fixed covariance.
    samples, accept_rate, stepsize_seq, time_elapsed = \
        RWMH(f_logp, theta0_rwmh, stepsize, n_warmup, n_sample, Sigma, seed, thin)
    print('Sampling completed.')
    samples = samples[n_burnin:, :]
    nfevals_per_itr = 1
    summary = summarize_sim_results(
        samples, time_elapsed, nfevals_per_itr, n_sample, n_burnin, theta0, seed
    )
    return summary

sim_result = Parallel(n_jobs=2)(delayed(met_simulation)(i) for i in range(n_rep))

In [None]:
filename = 'pac_bayes_met_simulation.pkl'
with open(filename, 'wb') as file:
    pkl.dump(sim_result, file)

### Run Metropolis-within-Gibbs

In [None]:
filename = '../mcmc_output/pac_bayes_dhmc_output.pkl'
with open(filename, 'rb') as file:
    mcmc_output = pkl.load(file)
theta0 = mcmc_output['samples'][-1, :]
Sigma = mcmc_output['emp_cov']
cond_sd = np.diag(np.linalg.inv(Sigma)) ** -.5

In [None]:
def adap_metropolis_gibbs(theta, prop_sd, aux, n_adap, n_per_adap=10):
    accept_rate = np.zeros((n_adap, n_param))
    for i in range(n_adap):
        adapt_rate = (i + 1) ** -1
        theta, prop_sd, accept_rate[i,:], aux \
            = adap_metropolis_gibbs_step(theta, prop_sd, aux, adapt_rate, n_per_adap)
    return theta, accept_rate, prop_sd, aux  

def adap_metropolis_gibbs_step(theta, prop_sd, aux, adapt_rate, n_per_adap):
    accept_prob = np.zeros((n_per_adap, n_param))
    for i in range(n_per_adap):
        theta, accept_prob[i,:], aux \
            = metropolis_gibbs_step(theta, prop_sd, aux)
    accept_rate = np.mean(accept_prob, 0)
    prop_sd *= np.exp(adapt_rate * (accept_rate - .441))
    return theta, prop_sd, accept_rate, aux

def metropolis_gibbs_step(theta, prop_sd, aux):
    accept_prob = np.zeros(n_param)
    for index in range(n_param):
        theta, accept_prob[index], aux = \
            cond_metropolis_update(theta, index, prop_sd, aux)
    return theta, accept_prob, aux

def cond_metropolis_update(theta, index, prop_sd, aux):
    # Sample from the conditional distribution imitating the optimal
    # Metropolis proposal standard deviation.
    dtheta = prop_sd[index] * np.random.randn()
    logp_diff, aux_new = f_update(theta, dtheta, index, aux)
    accept_prob = min(1, math.exp(logp_diff))
    if accept_prob > np.random.uniform():
        theta[index] += dtheta
        aux = aux_new
    return theta, accept_prob, aux

In [None]:
n_adap = 2500
n_per_adap = 10
n_burnin = 0
n_sample = 5 * 10 ** 4

def met_gibbs_simulation(seed):
    np.random.seed(seed)
    
    theta = theta0.copy()
    _, _, aux = f(theta)
    
    # Adapt the proposal variance for each parameter.
    prop_sd = 2.40 * cond_sd
    theta, accept_rate, prop_sd, aux \
        = adap_metropolis_gibbs(theta, prop_sd, aux, n_adap, n_per_adap)

    # Sample.
    samples = np.zeros((n_sample, n_param))
    accept_prob = np.zeros((n_sample, n_param))
    samples[0, :] = theta
    tic = time.time()
    for i in range(1, n_sample):
        samples[i, :], accept_prob[i, :], aux \
            = metropolis_gibbs_step(samples[i - 1, :], prop_sd, aux)
    # TODO: change back to 'process_time()'?
    toc = time.time()
    time_elapsed = toc - tic
    print('Sampling completed.')
    nfevals_per_itr = 1
    summary = summarize_sim_results(
        samples, time_elapsed, nfevals_per_itr, n_sample, n_burnin, theta0, seed
    )
    return summary

sim_result = Parallel(n_jobs=4)(delayed(met_gibbs_simulation)(i) for i in range(n_rep))

In [None]:
filename = 'pac_bayes_met_gibbs_simulation.pkl'
with open(filename, 'wb') as file:
    pkl.dump(sim_result, file)