# Before running the code

First, we need to install and import pints and other packages required. 

Then, we need to mount drive onto Colab and navigate to the right directories.


In [None]:
!pip install cma

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
%cd /content/drive/My\ Drive/msc_dissertation/pints-master/

In [None]:
!python setup.py install

In [None]:
import numpy as np
import math
import numpy.matlib as matlib
import operator
from time import perf_counter
import os
import os.path


import scipy
import matplotlib
import matplotlib.pyplot as plt
import cma
import tabulate
import pints
import pints.toy

In [None]:
%cd /content/drive/My\ Drive/msc_dissertation/experiment_1

# Initialise starting points of MCMCs

Initialise the dim-50 standard Gaussian distribution.

In [None]:
dim = 50
mean = [0]*dim
var = [1]*dim
gaus_log_pdf = pints.toy.GaussianLogPDF(mean=mean, sigma=var)

Initialise the starting points of Markov chains. These starting points are set to be the same across all samplers. Starting points are chosen such that they lie in a region of R^50 that have considerable probabilities. The following computation shows that the region [-3,3]^50 has a cumulative probability ~0.8735653. I am going to choose 100-600 starting points from this regions.

In [None]:
np.exp(dim*np.log(2*(scipy.stats.norm.cdf(3)-0.5)))

0.8735653359931238

Because of limitation of time and computational power, 601 points are not enough for dispersing evenly across the region [-3.0,3.0]^50. This is a limitation of the experiment. 300 of all the starting points I choose are i \times e_j for i = -3, -2, -1, 1, 2, 3 and j = 1, ..., 100 where e_j are standard unit vectors. The position of origin is also picked. The remaining 300 starting points are i \times (e_j + e_{j+1}).

According to Brooks et al, with good initial starting points, no burn-in period is needed for samplers without adaptation-free or warm-up period.

In [None]:
partial = np.identity(dim)
starting_pts = np.concatenate((np.array([[0]*dim]), 1*partial, 2*partial, 3*partial, 
                               (-1)*partial, (-2)*partial, (-3)*partial), axis=0)

another_half = np.concatenate((partial[1:], np.array([[1] + [0]*(dim-1)])), axis=0) + partial

starting_pts = np.concatenate((starting_pts, another_half, 2*another_half, 3*another_half, 
                               (-1)*another_half, (-2)*another_half, (-3)*another_half), axis=0)


starting_pts_list = list(starting_pts)

This produces 601 starting points:

In [None]:
starting_pts.shape

(601, 50)

# Run Haario-Bardenet on 50-dim Normal

First, this is the code that utilises MCMC.Controller to run the experiement. approximate_n_samples is the estimated upper threshold for the number of samples returned by ADMC in 2.5 minute.


In [None]:
path_to_save_data = 'haario_bardenet_data/'
chain_filename = 'chain'
log_filename = 'logging'
mcmc_type = 'ADMC_'
n_hours = 5

approximate_n_samples = 5000

print('start running MCMC.')
for i in range(1):
  mcmc = pints.MCMCController(gaus_log_pdf, 1, [starting_pts_list[100*n_hours + 6*i + 59]], method=pints.HaarioBardenetACMC)
  
  mcmc.set_max_iterations(approximate_n_samples)
  mcmc.set_parallel(False)
  mcmc.set_initial_phase_iterations(1000)
  mcmc.set_chain_filename(path_to_save_data + mcmc_type + chain_filename + '_' + str(100*n_hours + 6*i + 59))
  mcmc.set_log_to_file(filename=path_to_save_data + mcmc_type + log_filename + '_' + str(100*n_hours + 6*i + 59), csv=True)
  mcmc.set_log_interval(iters=1000, warm_up=1100)
  mcmc.set_log_to_screen(True)

  chains = mcmc.run()
  
  print('finishes the {}th chain'.format(str(100*n_hours + 6*i + 59)))

print('finishes 1 hour of mcmc!')


# Run HMC on 50-dim Normal

In [None]:
path_to_save_data = 'HMC_data/'
chain_filename = 'chain'
log_filename = 'logging'
mcmc_type = 'HMC_'
n_hours = 0

approximate_n_samples = 100000

print('start running MCMC.')
for i in range(20):
  mcmc = pints.MCMCController(gaus_log_pdf, 1, [starting_pts_list[100*n_hours + i]], 
                              method=pints.HamiltonianMCMC)
  mcmc.set_max_iterations(approximate_n_samples)
  mcmc.set_parallel(False)
  mcmc.set_chain_filename(path_to_save_data + mcmc_type + 'chain_' + str(100*n_hours + i))
  mcmc.set_log_to_file(filename=path_to_save_data + mcmc_type + 'logging_' + 
                       str(100*n_hours + i), csv=True)
  mcmc.set_log_interval(iters=50, warm_up=3)
  mcmc.set_log_to_screen(False)

  chains = mcmc.run()
  
  j = 0
  for sampler in mcmc.samplers():
    np.savez(path_to_save_data + mcmc_type + 'chain_' + str(100*n_hours + i + j) + 
             '_divergent_iters', divergent_iters = sampler.divergent_iterations())
    print('chain starting points: {}     divergent_iterations: {}'.format(100*n_hours + i + j, 
                                                                          sampler.divergent_iterations()))
    j = j+1

  print('finishes the {}th bunch of chains'.format(str(100*n_hours + i)))

print('finishes the execution!')

# Calculation of KL divergences at every 100 iterations (ADMC, HMC)

In [None]:

KL_divergence_with_SN = lambda mu_1, Sigma_1: 0.5*(np.trace(np.linalg.inv(Sigma_1)) + mu_1.dot(np.linalg.inv(Sigma_1)).dot(mu_1) - 
                                           len(mu_1) + math.log(np.linalg.det(Sigma_1)))

dim = 50

# iterations: from 0th to (num)th
num_iters_calculated = 28000


samples_matrix = np.zeros((601, num_iters_calculated + 1, dim))



for j in range(601):

  # load the realised chains previously run
  chain = np.genfromtxt('haario_bardenet_data_SFORD/ADMC_chain_' + str(j), delimiter = ',')

  single_sample = chain[1:,:]

  print('Dimension of {}th chain: {}'.format(j, single_sample.shape))



  # revised: retain iterations from 0th to (num)th
  single_sample = single_sample[0:(num_iters_calculated + 1),:]
  
  samples_matrix[j] = single_sample
  
  if j % 10 == 0:
    print('finishes loading {}th chains'.format(j))


# calculate empirical mean
means_array = np.mean(samples_matrix, axis = 0)

# debugging
if not means_array.shape == (num_iters_calculated + 1,50):
  print('Code error: mean')



# calculate empirical covariance matrix
samples_matrix_transpose = np.transpose(samples_matrix, (1,0,2))
covariance_mats_list = [np.cov(A, rowvar=False) for A in samples_matrix_transpose]
covariance_mats_array = np.array(covariance_mats_list)

# debugging
if not covariance_mats_array.shape == (num_iters_calculated + 1,50,50):
  print('Code error: cov')


KL_div = np.zeros(num_iters_calculated + 1)

# calculate KL divergence with standard normal of dimension 50
for i in range(num_iters_calculated + 1):
  KL_div[i] = KL_divergence_with_SN(means_array[i], covariance_mats_array[i])



# save the KL divergence vector
path_to_save_KLs = 'haario_bardenet_data_SFORD/KL_divergences_ADMC_' + str(num_iters_calculated)
np.savez(path_to_save_KLs, KL_divergence = KL_div)