# Initialise dim-50 standard Gaussian dist

In [None]:
import numpy as np
import math

In [None]:
dim = 50

Stand_Gaus_50 = Gaussian(np.zeros(dim), np.identity(dim))

# Initialise starting points

Here is how I initialise starting positions:

In [None]:
partial = np.identity(dim)
starting_pts = np.concatenate((np.array([[0]*dim]), 1*partial, 2*partial, 3*partial, 
                               (-1)*partial, (-2)*partial, (-3)*partial), axis=0)

another_half = np.concatenate((partial[1:], np.array([[1] + [0]*(dim-1)])), axis=0) + partial

starting_pts = np.concatenate((starting_pts, another_half, 2*another_half, 3*another_half, 
                               (-1)*another_half, (-2)*another_half, (-3)*another_half), axis=0)


# Run BPS algorithm on Gaussian. Save the data of the trajectory:

1. Turning points
2. Velocities there
3. The Markov chain's time there
4. CPU time spent
5. pdf evaluations done

In [None]:
n_batch = 0
time_trajectory = 100000
lambda_ref = 1
prob_dist = Stand_Gaus_50


for i in range(20):
  start_x = starting_pts[100*n_batch + i]
  if np.linalg.norm(start_x) == 0:
    start_v = np.array([1] + [0]*(dim-1))
  else:
    start_v = start_x
  
  turn_pts, list_of_velo, striding_times, total_evals_list, computational_times\
  = BPS_basic(x0 = start_x, v0 = start_v, Time = time_trajectory, lambda_ref = lambda_ref, 
              prob_dist = prob_dist)


  turning_points = np.array(turn_pts)
  v_list = np.array(list_of_velo)
  stride_times = np.array(striding_times)
  evaluations_list = np.array(total_evals_list)
  CPUtime_list = np.array(computational_times)



  path_to_save_chain = 'BPS_data/BPS_chain_' + str(100*n_batch + i) + '_'
  np.savez(path_to_save_chain + 'turning_points', turning_points = turning_points)
  np.savez(path_to_save_chain + 'v_list', v_list = v_list)
  np.savez(path_to_save_chain + 'stride_times', stride_times = stride_times)
  np.savez(path_to_save_chain + 'evaluations_list', evaluations_list = evaluations_list)
  np.savez(path_to_save_chain + 'CPUtime_list', CPUtime_list = CPUtime_list)

  
  print('finishes the {}th run of BPS chain    cpu_time: {} sec'.format(100*n_batch + i, 
                                                                        computational_times[-1]))


print('finishes executing a whole batch of BPS chains!')


# Compute the positions and velocities at time = 0, 100, ..., 28000 (or further) of the trajectory.

# Save such data.

In [None]:
# plot_step_size means that I would like to extract the velocity and position 
# at time=100 of the trajectory.

plot_step_size = 100
n_batch = 0
num_files = 20
starting_file = 0
all_chains = []

start_time = perf_counter()

for i in range(num_files):
  
  path_to_load_chain = 'BPS_data/BPS_chain_' + str(100*n_batch + starting_file + i) + '_'
  x_load = np.load(path_to_load_chain + 'turning_points.npz')
  v_load = np.load(path_to_load_chain + 'v_list.npz')
  t_load = np.load(path_to_load_chain + 'stride_times.npz')

  x = x_load['turning_points']
  v = v_load['v_list']
  t = t_load['stride_times']

  interm_times = np.arange(0,100000.1,plot_step_size)

  x_100s, v_100s, t_100s = x_v_t_arbitrary_times(turn_pts=x, list_of_velo=v, striding_times=t, intermediate_times=interm_times)


  path_to_save_chain = 'processed_BPS_data/processed_BPS_chain_' + str(100*n_batch + starting_file + i) + '_'
  np.savez(path_to_save_chain + 'x_array', x_array = np.array(x_100s))
  np.savez(path_to_save_chain + 'v_array', v_array = np.array(v_100s))

  print('finishes_processing {}th chain    cpu_time: {} sec'.format(100*n_batch + starting_file + i, perf_counter() - start_time))


print('finishes processing all!')

# Computation of KL divergences of the 601 chains at every 100th iterations.

In [None]:

KL_divergence_with_SN = lambda mu_1, Sigma_1: 0.5*(np.trace(np.linalg.inv(Sigma_1)) + mu_1.dot(np.linalg.inv(Sigma_1)).dot(mu_1) - 
                                           len(mu_1) + math.log(np.linalg.det(Sigma_1)))

dim = 50


samples_matrix = np.zeros((601, 301, dim))



for j in range(601):

  # load the realised chains previously run
  chain = np.load('processed_BPS_data/processed_BPS_chain_' + str(j) + '_x_array.npz')
  single_sample = chain['x_array']
  
  # discard iterations after 30000:
  if not single_sample.shape == (301,dim):
    single_sample = single_sample[0:301,:]
  
  samples_matrix[j] = single_sample
  
  if j % 10 == 0:
    print('finishes loading {}th chains'.format(j))


# calculate empirical mean
means_array = np.mean(samples_matrix, axis = 0)

# debugging
if not means_array.shape == (301,50):
  print('Code error: mean')



# calculate empirical covariance matrix
samples_matrix_transpose = np.transpose(samples_matrix, (1,0,2))
covariance_mats_list = [np.cov(A, rowvar=False) for A in samples_matrix_transpose]
covariance_mats_array = np.array(covariance_mats_list)

# debugging
if not covariance_mats_array.shape == (301,50,50):
  print('Code error: cov')


KL_div = np.zeros(301)

# calculate KL divergence with standard normal of dimension 50
for i in range(301):
  KL_div[i] = KL_divergence_with_SN(means_array[i], covariance_mats_array[i])



# save the KL divergence vector
path_to_save_KLs = 'processed_BPS_data/KL_divergences_BPS'
np.savez(path_to_save_KLs, KL_divergence = KL_div)
