In [1]:
import gym
import time
import numpy as np
import random
import sys
from scipy.integrate import quad
from scipy.stats import norm
from scipy.stats import geom
from copy import deepcopy as dc

from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
import numba

N_a = 3                             # number of possible actions (actions are -1, 0, and 1)
sigma = 0.1                         # noise
dim = 2                             # dimensionality of the space (v: R^d --> R)
obs_min = np.array([-1.2,-0.07])
obs_max = np.array([ 0.6, 0.07])

In [2]:
"""

initialise KL

"""

alpha = 2
def eigenvalues_1():
    ev_1 = np.zeros(2*N_trunc-2)
    for i in range(1,N_trunc):
        c_i = 1/((i+1)**2+(i+1)**2)**alpha
        ev_1[2*i-2] = c_i
        ev_1[2*i-1] = c_i
    ev_1 = 20*ev_1*np.sqrt(2)/np.pi**2
    return ev_1**2

def eigenvalues_2():
    ev_2 = np.zeros(4*N_trunc**2-4)
    for i in range(1,N_trunc):
        for j in range(1,N_trunc):
            c_ij = 1/((i+1)**2+(j+1)**2)**alpha
            ev_2[4*(i+N_trunc*j)-4] = c_ij
            ev_2[4*(i+N_trunc*j)-3] = c_ij
            ev_2[4*(i+N_trunc*j)-2] = c_ij
            ev_2[4*(i+N_trunc*j)-1] = c_ij
    ev_2 = 20*ev_2*np.sqrt(2)/np.pi**2
    return ev_2**2

def C_init():
    C = np.zeros((4*N_trunc**2-4,dim,dim))
    C_root = np.zeros((4*N_trunc**2-4,dim,dim))
    
    for j in range(dim):
        for i_1 in range(1,N_trunc):
            C[2*i_1-2,j,0] = ev_1[2*i_1-2]
            C[2*i_1-1,j,0] = ev_1[2*i_1-1]
            
    for j in range(dim):
        for k in range(dim):
            if k>j:
                for i_1 in range(1,N_trunc):
                    for i_2 in range(1,N_trunc):
                        C[4*(i_1+N_trunc*i_2)-4,j,k] = ev_2[4*(i_1+N_trunc*i_2)-4]
                        C[4*(i_1+N_trunc*i_2)-3,j,k] = ev_2[4*(i_1+N_trunc*i_2)-3]
                        C[4*(i_1+N_trunc*i_2)-2,j,k] = ev_2[4*(i_1+N_trunc*i_2)-2]
                        C[4*(i_1+N_trunc*i_2)-1,j,k] = ev_2[4*(i_1+N_trunc*i_2)-1]
    C_root = C**(1/2)
    return C,C_root

In [3]:
"""

FUNCTIONS - PRIOR

"""

def sample_prior(c=1):
    xi = np.zeros((4*N_trunc**2-4,dim,dim))
    
    for j in range(dim):
        for i_1 in range(1,N_trunc):
            xi[2*i_1-2,j,0] = C_root[2*i_1-2,j,0]*np.random.normal()
            xi[2*i_1-1,j,0] = C_root[2*i_1-1,j,0]*np.random.normal()
            
    for j in range(dim):
        for k in range(dim):
            if k>j:
                for i_1 in range(1,N_trunc):
                    for i_2 in range(1,N_trunc):
                        xi[4*(i_1+N_trunc*i_2)-4,j,k] = C_root[4*(i_1+N_trunc*i_2)-4,j,k]*np.random.normal()
                        xi[4*(i_1+N_trunc*i_2)-3,j,k] = C_root[4*(i_1+N_trunc*i_2)-3,j,k]*np.random.normal()
                        xi[4*(i_1+N_trunc*i_2)-2,j,k] = C_root[4*(i_1+N_trunc*i_2)-2,j,k]*np.random.normal()
                        xi[4*(i_1+N_trunc*i_2)-1,j,k] = C_root[4*(i_1+N_trunc*i_2)-1,j,k]*np.random.normal()
    return xi*c

def logprior(xi):
    logprior = 0
    
    for j in range(dim):
        for i in range(1,N_trunc):
            logprior += np.log(norm._pdf(xi[2*i-2,j,0]/C_root[2*i-2,j,0]))
            logprior += np.log(norm._pdf(xi[2*i-1,j,0]/C_root[2*i-1,j,0]))
    
    for j in range(dim):
        for k in range(dim):
            if k>j:
                for i_1 in range(1,N_trunc):
                    for i_2 in range(1,N_trunc):
                        logprior += np.log(norm._pdf(xi[4*(i_1+N_trunc*i_2)-4,j,k]/C_root[4*(i_1+N_trunc*i_2)-4,j,k]))
                        logprior += np.log(norm._pdf(xi[4*(i_1+N_trunc*i_2)-3,j,k]/C_root[4*(i_1+N_trunc*i_2)-3,j,k]))
                        logprior += np.log(norm._pdf(xi[4*(i_1+N_trunc*i_2)-2,j,k]/C_root[4*(i_1+N_trunc*i_2)-2,j,k]))
                        logprior += np.log(norm._pdf(xi[4*(i_1+N_trunc*i_2)-1,j,k]/C_root[4*(i_1+N_trunc*i_2)-1,j,k]))
                    
    return logprior

In [4]:
"""

FUNCTIONS - LIKELIHOOD 

"""

''' Function to integrate within likelihood '''
def f(x,v_a):
    value = norm._pdf((x-v_a[0])/sigma)/sigma
    for i in range(1,N_a):
        value = value*norm._cdf((x-v_a[i])/sigma)
    return value

def likelihood(xi,pair):
    ''' Check where agent's action would take us '''
    pos_curr = pair[0]
    speed_curr = pair[1]
    action = int(pair[2])
    
    data_state = pair[0:dim]
    x = np.zeros((N_a,dim))
    v = np.zeros(N_a)
    for j in range(N_a):
        env.state = data_state
        x[j,:] = env.step(j)[0]
        v[j] = u(xi,x[j,:])
        
    # sort v such that the first entry is the taken action
    v_a = np.zeros(N_a)
    if action!=0:
        v_a[0] = v[action]
        v_a[1:action+1] = v[0:action]
        v_a[action+1:] = v[action+1:]
    else:
        v_a = v
    
    lklhd = quad(f,v_a[0]-3*sigma,v_a[0]+3*sigma,args=v_a,limit=200)[0]
    return lklhd

def loglikelihood(xi,data):
    loglikelihood = 0
    for j in range(data.shape[0]):
        lh = likelihood(xi,data[j,:])
        loglikelihood += np.log(lh)
    return loglikelihood

''' partial derivative du/dxi '''
@numba.njit()
def diff_u(x,shape):    
    diff = np.zeros(shape)
    for j in range(dim):
        
        ''' 1D terms '''
        j_eval = np.pi*(x[j]-obs_min[j])/(obs_max[j]-obs_min[j])
        for i in range(1,N_trunc):
            c_j = 2/(obs_max[j]-obs_min[j])
            diff[2*i-2,j,0] += c_j*np.cos(i*j_eval)
            diff[2*i-1,j,0] += c_j*np.sin(i*j_eval)
            
        ''' 2D terms '''   
        for k in range(dim):
            if k>j:
                j_eval = np.pi*(x[j]-obs_min[j])/(obs_max[j]-obs_min[j])
                k_eval = np.pi*(x[k]-obs_min[k])/(obs_max[k]-obs_min[k])
                for i_1 in range(1,N_trunc):
                    for i_2 in range(1,N_trunc):
                        c_jk = 4/(obs_max[j]-obs_min[j])/(obs_max[k]-obs_min[k])
                        diff[4*(i_1+N_trunc*i_2)-4,j,k] += c_jk*np.cos(i_1*j_eval)*np.cos(i_2*k_eval)
                        diff[4*(i_1+N_trunc*i_2)-3,j,k] += c_jk*np.cos(i_1*j_eval)*np.sin(i_2*k_eval)
                        diff[4*(i_1+N_trunc*i_2)-2,j,k] += c_jk*np.sin(i_1*j_eval)*np.cos(i_2*k_eval)
                        diff[4*(i_1+N_trunc*i_2)-1,j,k] += c_jk*np.sin(i_1*j_eval)*np.sin(i_2*k_eval)
    return diff

''' function which is integrated in likelihood gradient '''
def f_grad(t,args):
    v_a = args[0]
    j = args[1]
    if j==0:
        value = (t-v_a[0])/(sigma**2)*norm._pdf((t-v_a[0])/sigma)/sigma
    else:
        value = -norm._pdf((v_a[0]-v_a[j])/(np.sqrt(2)*sigma))/(np.sqrt(2)*sigma)*norm._pdf((t-(v_a[0]+v_a[j])/2)/(sigma/np.sqrt(2)))/(sigma/np.sqrt(2))
    for i in range(1,N_a):
        if i!=j:
            value = value*norm._cdf((t-v_a[i])/sigma)
    return value

''' partial derivative dl/dv '''
def grad_ll(v_a,j):
    if j==0:
        return quad(f_grad,v_a[0]-3*sigma,v_a[0]+3*sigma,args=[v_a,j],limit=200)[0]
    else:
        return quad(f_grad,(v_a[0]+v_a[j])/2-3*(sigma/np.sqrt(2)),(v_a[0]+v_a[j])/2+3*(sigma/np.sqrt(2)),args=[v_a,j],limit=200)[0]

''' partial derivative dl/dxi'''
def diff_ll(xi,data):
    diff = np.zeros(xi.shape)
    
    x = np.zeros((N_a,dim))
    v = np.zeros(N_a)
    x_a = np.zeros((N_a,dim))
    v_a = np.zeros(N_a)
    grad_a = np.zeros(N_a)
    data_state = np.zeros(dim)
    
    ''' Iterate through all or a subset of the data points, and compute the respective gradients '''
    if stochastic_gradients and unadjusted:
        range_i = random.sample(range(data.shape[0]),10)
    else:
        range_i = range(data.shape[0])
    for i in range_i:
        lh = likelihood(xi,data[i,:])
        data_state = data[i,0:dim]
        data_action = int(data[i,-1])
        
        ''' compute locations the actions would take us to and the values of the value function at those points '''
        for j in range(N_a):
            env.state = data_state
            x[j,:] = env.step(j)[0]
            v[j] = u(xi,x[j,:])
        
        ''' sort v and x such that the first entry is the taken action '''
        if data_action!=0:
            v_a[0] = v[data_action]
            v_a[1:data_action+1] = v[0:data_action]
            v_a[data_action+1:] = v[data_action+1:]
            x_a[0,:] = x[data_action,:]
            x_a[1:data_action+1,:] = x[0:data_action,:]
            x_a[data_action+1:,:] = x[data_action+1:,:]
        else:
            v_a = v
            x_a = x
            
        ''' Calculate gradient at the v_i the action i would give us '''
        for j in range(N_a):
            grad_a[j] = grad_ll(v_a,j)/lh
        mean_grad = np.mean(grad_a)
        grad_a -= mean_grad
            
        for j in range(N_a):
            diff += grad_a[j]*diff_u(x_a[j,:],xi.shape)
            
    return diff

In [5]:
"""

FUNCTIONS - VALUE FUNCTION AND POLICIES

"""

''' u(xi,x), which evaluates the function u with coefficients xi at x=(pos,speed) --- new, complicated, but fast version '''
@numba.njit()
def u(xi,x):
    u_sum = 0
    for j in range(dim):
        
        ''' 1D terms '''
        j_eval = np.pi*(x[j]-obs_min[j])/(obs_max[j]-obs_min[j])
        for i in range(1,N_trunc):
            c_j = 2/(obs_max[j]-obs_min[j])
            u_sum += c_j*xi[2*i-2,j,0]*np.cos(i*j_eval)
            u_sum += c_j*xi[2*i-1,j,0]*np.sin(i*j_eval)
            
        ''' 2D terms '''   
        for k in range(dim):
            if k>j:
                j_eval = np.pi*(x[j]-obs_min[j])/(obs_max[j]-obs_min[j])
                k_eval = np.pi*(x[k]-obs_min[k])/(obs_max[k]-obs_min[k])
                for i_1 in range(1,N_trunc):
                    for i_2 in range(1,N_trunc):
                        c_jk = 4/(obs_max[j]-obs_min[j])/(obs_max[k]-obs_min[k])
                        u_sum += c_jk*xi[4*(i_1+N_trunc*i_2)-4,j,k]*np.cos(i_1*j_eval)*np.cos(i_2*k_eval)
                        u_sum += c_jk*xi[4*(i_1+N_trunc*i_2)-3,j,k]*np.cos(i_1*j_eval)*np.sin(i_2*k_eval)
                        u_sum += c_jk*xi[4*(i_1+N_trunc*i_2)-2,j,k]*np.sin(i_1*j_eval)*np.cos(i_2*k_eval)
                        u_sum += c_jk*xi[4*(i_1+N_trunc*i_2)-1,j,k]*np.sin(i_1*j_eval)*np.sin(i_2*k_eval)
    return u_sum

''' Policy from "Reinforcement Learning: Theory and {Python} Implementation" '''
def policy(position,velocity):
        lb = min(-0.09 * (position + 0.25) ** 2 + 0.03,
                0.3 * (position + 0.9) ** 4 - 0.008)
        ub = -0.07 * (position + 0.38) ** 2 + 0.07
        if lb < velocity < ub:
            action = 2 # push right
        else:
            action = 0 # push left
        return action

In [6]:
"""

FUNCTIONS - ANALYTICS

"""
    
''' Progress bar to know how much longer one has to wait '''
def progressBar(t,value, t_max, acceptances, bar_length=40):
    percent = float(t) / t_max
    arrow = '-' * int(round(percent * bar_length)-1) + '>'
    spaces = ' ' * (bar_length - len(arrow))
    sys.stdout.write("\rIteration: {0}    Acceptance ratio: {1}    Percent: [{2}] {3}%  ".format(value,round(acceptances/value,3),arrow + spaces, int(round(percent * 100))))
    sys.stdout.flush()     
        
''' Plotting a value function '''    
def func_plot(xi,name):
    x = np.arange(-1.2,0.6,0.02)
    y = np.arange(-0.07,0.07,0.002)
    X,Y = np.meshgrid(x,y)
    Z = np.zeros(X.shape)
    
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            x=np.zeros(dim)
            x[0]=X[i,j]
            x[1]=Y[i,j]
            Z[i,j] = u(xi,x)
            
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.RdBu,linewidth=0, antialiased=False)
    ax.zaxis.set_major_locator(LinearLocator(10))
    ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
    ax.set_xlabel('x-axis')
    ax.set_ylabel('y-axis')
    ax.set_zlabel('z-axis')
    ax.view_init(elev=25, azim=-120)
    fig.colorbar(surf, shrink=0.5, aspect=5)
    fig.savefig(name + '.png', bbox_inches='tight')
    plt.close(fig)

    
''' Plot a trajectory '''  
def trajectory_plot(xi,name):
    x = np.arange(len(xi))
    fig = plt.figure()
    plt.plot(x,xi)
    fig.savefig(name + '.png', bbox_inches='tight')
    plt.close(fig)
    
''' Compute the autocorrelations '''    
def autocorr(x,lags):
    mean=np.mean(x)
    var=np.var(x)
    xp=x-mean
    corr=[1. if l==0 else np.sum(xp[l:]*xp[:-l])/len(x)/var for l in lags]
    return np.array(corr)

''' Calculate the Effective Sample Size, assumes algorithm already burned in '''
def ESS(logposterior,name):
    fig, ax = plt.subplots()
    N = len(logposterior)
    ax.stem(autocorr(logposterior, range(int(N*0.1))),use_line_collection=True) 
    ESS = N/(1+2*sum(autocorr(logposterior, range(int(N*0.1)))))
    print('\nEffective Sample Size:', round(ESS))
    print('Samples required to generate 1 independent sample:', round(N/ESS,2))
    fig.savefig(name + '.png', bbox_inches='tight')
    plt.close(fig)    

In [7]:
"""

Uncertainty Quantification Initialisation

"""

x_test = np.load('MC_x_test.npy')
a_test = np.asarray(np.load('MC_a_test.npy'),dtype=int)
    
v_test = [[],[],[],[],[]]
for j in range(5):
    for i in range(N_a):
        v_test[j].append([])

def test_value_fn(xi):
    for j in range(5):
        ''' Evaluate value function at test points '''
        for i in range(N_a):
            v = u(xi,x_test[i,:,j])
            v_test[j][i].append(v)
        ''' substract value at optimal test point for normalisation purposes'''
        for i in range(N_a):
            if i!=a_test[j]:
                v_test[j][i][-1]=v_test[j][i][-1]-v_test[j][a_test[j]][-1]
        v_test[j][a_test[j]][-1] = 0
        
def boxplot_value_fn():
    global v_test
    fig, ax = plt.subplots()
    ax.set_title('UQ of relative value function evaluation')
    ax.boxplot(v_test[0], positions = [0,1,2])
    ax.boxplot(v_test[1], positions = [4,5,6])
    ax.boxplot(v_test[2], positions = [8,9,10])
    ax.boxplot(v_test[3], positions = [12,13,14])
    ax.boxplot(v_test[4], positions = [16,17,18])
    ax.set_xticklabels(['L','0','R','L','0','R','L','0','R','L','0','R','L','0','R'])

    fig.savefig('figs/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_UQ.pdf', dpi=300)
    plt.close(fig) 
    
    v_test = [[],[],[],[],[]]
    for j in range(5):
        for i in range(N_a):
            v_test[j].append([])

In [8]:
"""

FUNCTIONS - MCMC (pCN/pCNL)

"""

def acceptance_prop(xi_u, xi_v,data,ll_u,diff_u=False):
    accept_prop = -ll_u
    ll_v = 0
        
    for j in range(data.shape[0]):
        lh = likelihood(xi_v,data[j,:])
        ll_v += np.log(lh)
    accept_prop += ll_v
        
    ''' Only pCNL (has extra terms) '''
    if method=='pCNL':
        accept_prop += -np.sum((xi_v-xi_u)*diff_u)/2 - delta*np.sum((xi_u+xi_v)*diff_u)/4 + delta*np.linalg.norm(diff_u*C_root)**2/4
        diff_v = diff_ll(xi_v,data)
        accept_prop -= -np.sum((xi_u-xi_v)*diff_v)/2 - delta*np.sum((xi_v+xi_u)*diff_v)/4 + delta*np.linalg.norm(diff_v*C_root)**2/4
        return min(1, np.exp(accept_prop)),ll_v,diff_v
    else:
        return min(1, np.exp(accept_prop)),ll_v
    
def propose(xi,diff=False):
    if method=='pCNL':
        proposal = ((2-delta)*xi + 2*delta*C*diff + np.sqrt(8*delta)*sample_prior())/(2+delta)     
    else:
        proposal = np.sqrt(1-beta*beta)*xi+beta*sample_prior()
    return proposal

def MCMC(xi,N_data,data,max_time):   
    print('\nMCMC algorithm ('+method + ', N_trunc=' + str(N_trunc) + ', N_data=' + str(N_data) + ', ' + str(max_time) + ' seconds) was started: ' + str(time.ctime()))
        
    acc_ratio = 0
    logposterior = []
    logp = []
    logl = []
    
    ''' Initialise likelihood and gradient '''  
    ll = loglikelihood(xi,data)
    print('Initial loglikelihood: ',ll)
    if method=='CNL' or method=='pCNL':
        diff = diff_ll(xi,data)
        func_plot(diff,'figs/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_diff')
    
    ''' Run MCMC '''
    start = time.time() 
    j = 0
    it = 0
    while(time.time()-start<max_time):
        
        ''' Propose and calculate acceptance probability '''
        if method=='CNL' or method=='pCNL':
            xi_proposal = propose(xi,diff)  
            a,ll_proposal,diff_proposal = acceptance_prop(xi,xi_proposal,data,ll,diff)
        else:
            xi_proposal = propose(xi)  
            a,ll_proposal = acceptance_prop(xi,xi_proposal,data,ll)
        
        ''' Accept or reject proposal '''
        uni = np.random.uniform()
        if uni < a or unadjusted:
            if method=='CNL' or method=='pCNL':
                diff = diff_proposal
            xi = xi_proposal    
            ll = ll_proposal
            acc_ratio = acc_ratio + 1
            

        ''' prior, likelihood, and posterior traceplots are appended '''
        lp = logprior(xi)
        logposterior.append(lp+ll)
        logp.append(lp)
        logl.append(ll)
        
        if prior_compare and j%10==0:
            ''' store value function evaluations for uncertainty estimates '''
            test_value_fn(xi)
        elif policy_compare and (time.time()-start)>it*t_max/1000 and it<1000:
            ''' store sample for future use '''
            np.save('np_saved/MC/samples_policy_learning/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_sampleNo'+str(it)+'.npy',xi)
            it += 1
        
        if (j+1)%100==0:
            progressBar(time.time()-start,j+1,max_time,acc_ratio)
        j+=1
        
    progressBar(max_time,j,max_time,acc_ratio)
    
    acc_ratio = acc_ratio/(j)
    print('\nMCMC algorithm terminated: ' + str(time.ctime()) + '. \nRuntime = ' + str(time.time()-start))
    print('Final loglikelihood: ',ll)
    print('Acceptance ratio is ',acc_ratio)
    
    trajectory_plot(logposterior[1:],'figs/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_logposterior')
    trajectory_plot(logp[1:],'figs/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_logprior')
    trajectory_plot(logl[1:],'figs/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_loglikelihood')
    if prior_compare:
        boxplot_value_fn()
    np.save('np_saved/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_lastSample.npy',xi)
    func_plot(xi,'figs/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_lastSample')
    
    ESS(logposterior,'figs/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_autocorr')    

# MAIN PROGRAMMES

In [10]:
"""

MAIN PROGRAMME 1 - compare NN prior to KL prior (large number of parameters)

"""       

prior_compare = True
policy_compare = False

# set maximal runtime
t_max = 3600*10

N_trunc = 70 # make sure to restart the kernel and recompile the numba compiled code
ev_1 = eigenvalues_1()
ev_2 = eigenvalues_2()
C,C_root = C_init()

# Sample from the prior to see what a sample looks like
xi = sample_prior()
func_plot(xi,'figs/MC/KL_'+str(N_trunc)+'_a_prior_sample')

# Create environment 
env = gym.make('MountainCar-v0')
env = env.unwrapped
data = np.load('MC_data.npy')
N_data = 50 

''' run pCN '''
method = 'pCN'
stochastic_gradients = False
unadjusted = False
beta =  1/2.1
try:
    xi = np.load('np_saved/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_lastSample.npy')
except FileNotFoundError:
    print('Starting from close to 0')
    xi = sample_prior(0.1)
MCMC(xi,N_data,data[0:N_data,:],t_max) 
                                          
env.close()   


MCMC algorithm (pCNL, N_trunc=70, N_data=50, 120 seconds) was started: Sat Jul 18 16:36:06 2020
Initial loglikelihood:  -30.015746202235338
Iteration: 283    Acceptance ratio: 0.777    Percent: [--------------------------------------->] 100%  
MCMC algorithm terminated: Sat Jul 18 16:38:08 2020. 
Runtime = 120.38629126548767
Final loglikelihood:  -19.787930200522254
Acceptance ratio is  0.7773851590106007


In [9]:
"""

MAIN PROGRAMME 2 - LEARN policy, and store mean for future use (small number of parameters)

"""    

prior_compare = False
policy_compare = True

# set maximal runtime
t_max = 3600*10

N_trunc = 7 # make sure to restart the kernel and recompile the numba compiled code
ev_1 = eigenvalues_1()
ev_2 = eigenvalues_2()
C,C_root = C_init()

# Sample from the prior to see what a sample looks like
xi = sample_prior()
func_plot(xi,'figs/MC/KL_'+str(N_trunc)+'_a_prior_sample')

# Create environment 
env = gym.make('MountainCar-v0')
env = env.unwrapped
data = np.load('MC_data.npy')
N_data = 50


''' run pCN '''
method = 'pCN'
stochastic_gradients = False
unadjusted = False
beta =  1/2.1
try:
    xi = np.load('np_saved/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_lastSample.npy')
except FileNotFoundError:
    print('Starting from close to 0')
    xi = sample_prior(0.1)
MCMC(xi,N_data,data[0:N_data,:],t_max) 


''' run pCNL '''
method = 'pCNL'
stochastic_gradients = False # if true then unadjusted needs to be true too
unadjusted = False
delta = 1/18
try:
    xi = np.load('np_saved/MC/KL_'+str(N_trunc)+'_'+method+'_NData'+str(N_data)+'_lastSample.npy')
except FileNotFoundError:
    print('\nStarting from close to 0')
    xi = sample_prior(0.1)
MCMC(xi,N_data,data[0:N_data,:],t_max) 
                                          
env.close()   


MCMC algorithm (pCNL, N_trunc=7, N_data=50, 36000 seconds) was started: Sat Jul 18 22:51:54 2020
Initial loglikelihood:  -23.453563879154917
Iteration: 185040    Acceptance ratio: 0.667    Percent: [--------------------------------------->] 100%  
MCMC algorithm terminated: Sun Jul 19 08:51:56 2020. 
Runtime = 36000.08349490166
Final loglikelihood:  -24.122343331108585
Acceptance ratio is  0.6670395590142671

Effective Sample Size: 8196.0
Samples required to generate 1 independent sample: 22.58
