In [1]:
import torch 
import gym
import time
import numpy as np
import random
import sys
from scipy.integrate import quad
from scipy.stats import norm
from scipy.stats import geom
from torch import nn
from copy import deepcopy as dc

from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['text.usetex'] = True
matplotlib.use('Agg')

torch.autograd.set_detect_anomaly(False) #makes code very slow but one can find errors in backward()

N_a = 3                             # number of possible actions (actions are -1, 0, and 1)
sigma = 0.1                         # noise
dim = 2                             # dimensionality of the space (v: R^d --> R)
obs_min = np.array([-1.2,-0.07])
obs_max = np.array([ 0.6, 0.07])

In [2]:
"""

initialise NN

"""

# Hyperparameters for our network
input_size = dim
output_size = 1
n_layer = 3

In [3]:
"""

FUNCTIONS - PRIOR

""" 


''' Set sigma_w and sigma_b uniformly '''    
alpha = 0

sigma_w_sq = np.ones(n_layer+1)*1/3
sigma_b_sq = np.ones(n_layer+1)*1/3
sigma_w_sq[-1] = 1/3
sigma_b_sq[-1] = 1/3
    

''' prior samples are represented by array of coefficients '''
def sample_prior(c=1):
    w = []
    b = []
    
    ''' weights '''
    w_layer = np.random.randn(model[0].weight.shape[0],model[0].weight.shape[1])*C_root[0][0]
    w.append(c*w_layer)
    
    for l in range(1,n_layer):
        w_layer = np.random.randn(model[2*l].weight.shape[0],model[2*l].weight.shape[1])*C_root[0][l]
        w.append(c*w_layer)
     
    w_layer = np.random.randn(model[2*n_layer].weight.shape[0],model[2*n_layer].weight.shape[1])*C_root[0][-1]
    w.append(c*w_layer)
                
    ''' biases '''
    b_layer = np.random.randn(model[0].bias.shape[0])*C_root[1][0]
    b.append(c*b_layer)
    
    for l in range(1,n_layer):
        b_layer = np.random.randn(model[2*l].bias.shape[0])*C_root[1][l]
        b.append(c*b_layer)
     
    b_layer = np.random.randn(model[2*n_layer].bias.shape[0])*C_root[1][-1]
    b.append(c*b_layer)
    
    return [w,b]


''' evaluate the log_prior up to a constant '''
def logprior(xi):
    log_prior = 0
    w = xi[0]
    b = xi[1]
    
    ''' weights '''
    for l in range(n_layer+1):
        log_prior += -0.5*np.sum(w[l]**2/C[0][l])
        
    ''' biases '''
    for l in range(n_layer+1):
        log_prior += -0.5*np.sum(b[l]**2/C[1][l])
        
    return log_prior

In [4]:
"""

FUNCTIONS - LIKELIHOOD 

"""

''' Function to integrate within likelihood '''
def f(x,v_a):
    value = norm._pdf((x-v_a[0])/sigma)/sigma
    for i in range(1,N_a):
        value = value*norm._cdf((x-v_a[i])/sigma)
    return value

def likelihood(pair):
    ''' Check where agent's action would take us '''
    pos_curr = pair[0]
    speed_curr = pair[1]
    action = int(pair[2])
    
    data_state = pair[0:dim]
    x = np.zeros((N_a,dim))
    v = np.zeros(N_a)
    for j in range(N_a):
        env.state = data_state
        x[j,:] = env.step(j)[0]
        v[j] = u(x[j,:]).detach().numpy()[0]
        
    # sort v such that the first entry is the taken action
    v_a = np.zeros(N_a)
    if action!=0:
        v_a[0] = v[action]
        v_a[1:action+1] = v[0:action]
        v_a[action+1:] = v[action+1:]
    else:
        v_a = v
    
    ''' Integrate over pdf of chosen action and cdf of other actions '''
    lklhd = quad(f,v_a[0]-3*sigma,v_a[0]+3*sigma,args=v_a,limit=200)[0]
    return lklhd

def loglikelihood(data):
    loglikelihood = 0
    for j in range(data.shape[0]):
        lh = likelihood(data[j,:])
        loglikelihood += np.log(lh)
    return loglikelihood

''' function which is integrated in likelihood gradient '''
def f_grad(t,args):
    v_a = args[0]
    j = args[1]
    if j==0:
        value = (t-v_a[0])/(sigma**2)*norm._pdf((t-v_a[0])/sigma)/sigma
    else:
        value = -norm._pdf((v_a[0]-v_a[j])/(np.sqrt(2)*sigma))/(np.sqrt(2)*sigma)*norm._pdf((t-(v_a[0]+v_a[j])/2)/(sigma/np.sqrt(2)))/(sigma/np.sqrt(2))
    for i in range(1,N_a):
        if i!=j:
            value = value*norm._cdf((t-v_a[i])/sigma)
    return value

''' partial derivative dl/dv '''
def grad_ll(v_a,j):
    if j==0:
        return quad(f_grad,v_a[0]-3*sigma,v_a[0]+3*sigma,args=[v_a,j],limit=200)[0]
    else:
        return quad(f_grad,(v_a[0]+v_a[j])/2-3*(sigma/np.sqrt(2)),(v_a[0]+v_a[j])/2+3*(sigma/np.sqrt(2)),args=[v_a,j],limit=200)[0]

''' partial derivative dl/dw, dl/db'''
def diff_ll(data):
    diff_w = []
    diff_b = []
        
    x = np.zeros((N_a,dim))
    v = np.zeros(N_a)
    x_a = np.zeros((N_a,dim))
    v_a = np.zeros(N_a)
    grad_a = np.zeros(N_a)
    data_state = np.zeros(dim)
    
    ''' Iterate through all or a subset of the data points, and compute the respective gradients '''
    if stochastic_gradients and unadjusted:
        range_i = random.sample(range(data.shape[0]),10)
    else:
        range_i = range(data.shape[0])
    for i in range_i:
        lh = likelihood(data[i,:])
        data_state = data[i,0:dim]
        data_action = int(data[i,-1])
        
        ''' compute locations the actions would take us to and the values of the value function at those points '''
        for j in range(N_a):
            env.state = data_state
            x[j,:] = env.step(j)[0]
            v[j] = u(x[j,:]).detach().numpy()[0]
        
        ''' sort v and x such that the first entry is the taken action '''
        if data_action!=0:
            v_a[0] = v[data_action]
            v_a[1:data_action+1] = v[0:data_action]
            v_a[data_action+1:] = v[data_action+1:]
            x_a[0,:] = x[data_action,:]
            x_a[1:data_action+1,:] = x[0:data_action,:]
            x_a[data_action+1:,:] = x[data_action+1:,:]
        else:
            v_a = v
            x_a = x
            
        ''' Calculate gradient at the v_i the action i would give us '''
        for j in range(N_a):
            grad_a[j] = grad_ll(v_a,j)/lh
        mean_grad = np.mean(grad_a)
        grad_a -= mean_grad
            
        for j in range(N_a):
            out = u(x_a[j,:])
            out.backward(torch.from_numpy(np.array([grad_a[j]])).float())

    for l in range(n_layer+1):
        diff_w.append(model[2*l].weight.grad.data.clone().detach().numpy())
        diff_b.append(model[2*l].bias.grad.data.clone().detach().numpy())
        
    return [diff_w, diff_b]

In [5]:
"""

FUNCTIONS - VALUE FUNCTION AND POLICIES

"""

''' u(x), which evaluates the function u at x=(pos,speed) with the currently set xi '''
def u(x):
    x_transformed = np.zeros(dim)
    for i in range(dim):
        x_transformed[i] = ((x[i]-obs_min[i])/(obs_max[i]-obs_min[i])-1/2)*2
        
    value = model(torch.from_numpy(x_transformed).float())
    return value

''' Policy from "Reinforcement Learning: Theory and {Python} Implementation" '''
def policy(position,velocity):
        lb = min(-0.09 * (position + 0.25) ** 2 + 0.03,
                0.3 * (position + 0.9) ** 4 - 0.008)
        ub = -0.07 * (position + 0.38) ** 2 + 0.07
        if lb < velocity < ub:
            action = 2 # push right
        else:
            action = 0 # push left
        return action

In [6]:
"""

FUNCTIONS - ANALYTICS

"""
    
''' Progress bar to know how much longer one has to wait '''
def progressBar(t,value, t_max, acceptances, bar_length=40):
    percent = float(t) / t_max
    arrow = '-' * int(round(percent * bar_length)-1) + '>'
    spaces = ' ' * (bar_length - len(arrow))
    sys.stdout.write("\rIteration: {0}    Acceptance ratio: {1}    Percent: [{2}] {3}%  ".format(value,round(acceptances/value,3),arrow + spaces, int(round(percent * 100))))
    sys.stdout.flush()    
        
''' Plotting a value function '''    
def func_plot(xi,name):
    x = np.arange(-1.2,0.6,0.02)
    y = np.arange(-0.07,0.07,0.002)
    X,Y = np.meshgrid(x,y)
    Z = np.zeros(X.shape)
    
    for l in range(n_layer+1):
        model[2*l].weight = torch.nn.Parameter(torch.from_numpy(xi[0][l]).float(), requires_grad=False)
        model[2*l].bias = torch.nn.Parameter(torch.from_numpy(xi[1][l]).float(), requires_grad=False)
        
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            Z[i,j] = u((X[i,j],Y[i,j]))[0]
            
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.RdBu,linewidth=0, antialiased=False)
    ax.zaxis.set_major_locator(LinearLocator(10))
    ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
    ax.set_xlabel('x-axis')
    ax.set_ylabel('y-axis')
    ax.set_zlabel('z-axis')
    ax.view_init(elev=25, azim=-120)
    fig.colorbar(surf, shrink=0.5, aspect=5)
    fig.savefig(name + '.png', bbox_inches='tight',format='png')
    plt.close(fig)
    
''' Plot a trajectory '''  
def trajectory_plot(xi,name):
    x = np.arange(len(xi))
    fig = plt.figure()
    plt.plot(x,xi)
    fig.savefig(name + '.png', bbox_inches='tight')
    plt.close(fig)
    
''' Compute the autocorrelations '''    
def autocorr(x,lags):
    mean=np.mean(x)
    var=np.var(x)
    xp=x-mean
    corr=[1. if l==0 else np.sum(xp[l:]*xp[:-l])/len(x)/var for l in lags]
    return np.array(corr)

''' Calculate the Effective Sample Size, assumes algorithm already burned in '''
def ESS(logposterior,name):
    fig, ax = plt.subplots()
    N = len(logposterior)
    ax.stem(autocorr(logposterior, range(int(N*0.1))),use_line_collection=True) 
    ESS = N/(1+2*sum(autocorr(logposterior, range(int(N*0.1)))))
    print('Effective Sample Size:', round(ESS))
    print('Samples required to generate 1 independent sample:', round(N/ESS,2))
    fig.savefig(name + '.png', bbox_inches='tight')
    plt.close(fig)    

In [7]:
"""

Uncertainty Quantification Initialisation

"""

x_test = np.load('MC_x_test.npy')
a_test = np.asarray(np.load('MC_a_test.npy'),dtype=int)
    
v_test = [[],[],[],[],[]]
for j in range(5):
    for i in range(N_a):
        v_test[j].append([])

def test_value_fn():
    for j in range(5):
        ''' Evaluate value function at test points '''
        for i in range(N_a):
            v = u(x_test[i,:,j]).detach().numpy()[0]
            v_test[j][i].append(v)
        ''' substract value at optimal test point for normalisation purposes'''
        for i in range(N_a):
            if i!=a_test[j]:
                v_test[j][i][-1]=v_test[j][i][-1]-v_test[j][a_test[j]][-1]
        v_test[j][a_test[j]][-1] = 0
        
def boxplot_value_fn():
    global v_test
    fig, ax = plt.subplots()
    ax.set_title('UQ of relative value function evaluation')
    ax.boxplot(v_test[0], positions = [0,1,2])
    ax.boxplot(v_test[1], positions = [4,5,6])
    ax.boxplot(v_test[2], positions = [8,9,10])
    ax.boxplot(v_test[3], positions = [12,13,14])
    ax.boxplot(v_test[4], positions = [16,17,18])
    ax.set_xticklabels(['L','0','R','L','0','R','L','0','R','L','0','R','L','0','R'])

    fig.savefig('figs/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_UQ.pdf', dpi=300)
    plt.close(fig) 
    
    v_test = [[],[],[],[],[]]
    for j in range(5):
        for i in range(N_a):
            v_test[j].append([])

In [8]:
"""

FUNCTIONS - MCMC (pCN/pCNL)

"""

def acceptance_prop(xi_u, xi_v,data,ll_u,diff_u=False):
    accept_prop = -ll_u
    ll_v = 0
    
    ''' both pCN and pCNL '''
    for l in range(n_layer+1):
        model[2*l].weight = torch.nn.Parameter(torch.from_numpy(xi_v[0][l]).float(), requires_grad=True)
        model[2*l].bias = torch.nn.Parameter(torch.from_numpy(xi_v[1][l]).float(), requires_grad=True)
        
    for j in range(data.shape[0]):
        lh = likelihood(data[j,:])
        ll_v += np.log(lh)
    accept_prop += ll_v
    
    ''' unadjusted? '''
    if unadjusted:
        diff_v = diff_ll(data)
        return 1,ll_v,diff_v
    
    elif method=='pCNL':
        diff_v = diff_ll(data)
        for l in range(n_layer+1):
            accept_prop += -np.sum((xi_v[0][l]-xi_u[0][l])*diff_u[0][l])/2 - delta*np.sum((xi_u[0][l]+xi_v[0][l])*diff_u[0][l])/4 + delta*np.linalg.norm(diff_u[0][l]*C_root[0][l])**2/4
            accept_prop += -np.sum((xi_v[1][l]-xi_u[1][l])*diff_u[1][l])/2 - delta*np.sum((xi_u[1][l]+xi_v[1][l])*diff_u[1][l])/4 + delta*np.linalg.norm(diff_u[1][l]*C_root[1][l])**2/4

            accept_prop -= -np.sum((xi_u[0][l]-xi_v[0][l])*diff_v[0][l])/2 - delta*np.sum((xi_v[0][l]+xi_u[0][l])*diff_v[0][l])/4 + delta*np.linalg.norm(diff_v[0][l]*C_root[0][l])**2/4  
            accept_prop -= -np.sum((xi_u[1][l]-xi_v[1][l])*diff_v[1][l])/2 - delta*np.sum((xi_v[1][l]+xi_u[1][l])*diff_v[1][l])/4 + delta*np.linalg.norm(diff_v[1][l]*C_root[1][l])**2/4      
        return min(1, np.exp(accept_prop)),ll_v,diff_v
#     elif method=='CNL':
#         diff_v = diff_ll(data)
#         for l in range(n_layer+1):
#             accept_prop += -np.sum((xi_v[0][l]-xi_u[0][l])*diff_u[0][l])/2 - delta*np.sum((xi_u[0][l]+xi_v[0][l])*diff_u[0][l]/C[0][l])/4 + delta*np.linalg.norm(diff_u[0][l])**2/4
#             accept_prop += -np.sum((xi_v[1][l]-xi_u[1][l])*diff_u[1][l])/2 - delta*np.sum((xi_u[1][l]+xi_v[1][l])*diff_u[1][l]/C[1][l])/4 + delta*np.linalg.norm(diff_u[1][l])**2/4

#             accept_prop -= -np.sum((xi_u[0][l]-xi_v[0][l])*diff_v[0][l])/2 - delta*np.sum((xi_v[0][l]+xi_u[0][l])*diff_v[0][l]/C[0][l])/4 + delta*np.linalg.norm(diff_v[0][l])**2/4  
#             accept_prop -= -np.sum((xi_u[1][l]-xi_v[1][l])*diff_v[1][l])/2 - delta*np.sum((xi_v[1][l]+xi_u[1][l])*diff_v[1][l]/C[1][l])/4 + delta*np.linalg.norm(diff_v[1][l])**2/4      
#         return min(1, np.exp(accept_prop)),ll_v,diff_v
    else:
        return min(1, np.exp(accept_prop)),ll_v
    
def propose(xi,diff=False):
    w = xi[0]
    b = xi[1]
    
    noise = sample_prior()
    w_noise = noise[0]
    b_noise = noise[1]
    
    w_proposal = []
    b_proposal = []
    for l in range(n_layer+1):
        w_proposal.append(np.zeros(model[2*l].weight.shape))
        b_proposal.append(np.zeros(model[2*l].bias.shape))
    
    if method=='pCNL':
        diff_w = diff[0]
        diff_b = diff[1]
        for l in range(n_layer+1):
            w_proposal[l] = ((2-delta)*w[l] + 2*delta*C[0][l]*diff_w[l] + np.sqrt(8*delta)*w_noise[l])/(2+delta)
            b_proposal[l] = ((2-delta)*b[l] + 2*delta*C[1][l]*diff_b[l] + np.sqrt(8*delta)*b_noise[l])/(2+delta)
#     elif method=='CNL':
#         diff_w = diff[0]
#         diff_b = diff[1]
#         for l in range(n_layer+1):
#             w_proposal[l] = ((2*C[0][l]-delta)*w[l] + 2*delta*C[0][l]*diff_w[l] + np.sqrt(8*delta*C[0][l])*w_noise[l])/(2*C[0][l]+delta)
#             b_proposal[l] = ((2*C[1][l]-delta)*b[l] + 2*delta*C[1][l]*diff_b[l] + np.sqrt(8*delta*C[1][l])*b_noise[l])/(2*C[1][l]+delta)
    else:
        for l in range(n_layer+1):
            w_proposal[l] = np.sqrt(1-beta*beta)*w[l]+beta*w_noise[l]
            b_proposal[l] = np.sqrt(1-beta*beta)*b[l]+beta*b_noise[l]
    return [w_proposal,b_proposal]

def MCMC(xi,N_data,data,max_time):   
    print('\nMCMC algorithm ('+method + ', N_data=' + str(N_data) + ', ' + str(max_time) + ' seconds) was started: ' + str(time.ctime()))
      
    acc_ratio = 0
    logposterior = []
    logp = []
    logl = []
    
    ''' Set model weights and biases to current iterate '''
    for l in range(n_layer+1):
        model[2*l].weight = torch.nn.Parameter(torch.from_numpy(xi[0][l]).float(), requires_grad=True)
        model[2*l].bias = torch.nn.Parameter(torch.from_numpy(xi[1][l]).float(), requires_grad=True)
        
    ''' Initialise likelihood and gradient '''    
    ll = loglikelihood(data)
    print('Initial loglikelihood: ',ll)
    if method=='CNL' or method=='pCNL':
        diff = diff_ll(data)
        func_plot(diff,'figs/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_diff')
        
    ''' Run MCMC '''
    start = time.time() 
    j = 0
    it = 0
    while(time.time()-start<max_time):
        
        ''' Swap functions around to get mode switching, not applicable for standard BNN '''
        
        ''' Propose and calculate acceptance probability '''
        if method=='pCNL' or method=='CNL':
            xi_proposal = propose(xi,diff)  
            a,ll_proposal,diff_proposal = acceptance_prop(xi,xi_proposal,data,ll,diff)
        else:
            xi_proposal = propose(xi)  
            a,ll_proposal = acceptance_prop(xi,xi_proposal,data,ll)
        
        ''' Accept or reject proposal '''
        uni = np.random.uniform()
        if uni < a or unadjusted:
            if method=='pCNL' or method=='CNL':
                diff = diff_proposal
            xi = xi_proposal    
            ll = ll_proposal
            acc_ratio = acc_ratio + 1

        ''' prior, likelihood, and posterior traceplots are appended '''
        lp = logprior(xi)
        logposterior.append(lp+ll)
        logp.append(lp)
        logl.append(ll)
        
        if prior_compare and j%10==0:
            ''' store value function evaluations for uncertainty estimates '''
            test_value_fn()
        elif policy_compare and (time.time()-start)>it*t_max/1000 and it<1000:
            ''' store sample for future use '''
            for l in range(n_layer+1):
                np.save('np_saved/MC/samples_policy_learning/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_w'+str(l)+'_sampleNo'+str(it)+'.npy',xi[0][l])
                np.save('np_saved/MC/samples_policy_learning/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_b'+str(l)+'_sampleNo'+str(it)+'.npy',xi[1][l])
            it += 1
        
        if (j+1)%100==0:
            progressBar(time.time()-start,j+1,max_time,acc_ratio)
        j+=1
        
    progressBar(max_time,j,max_time,acc_ratio)
    
    acc_ratio = acc_ratio/(j)
    print('\nMCMC algorithm terminated: ' + str(time.ctime()) + '. \nRuntime = ' + str(time.time()-start) + '\nSteps: ' + str(j))
    print('Final loglikelihood: ',ll)
    print('Acceptance ratio is ',acc_ratio)
    if dim_robust_test:
        global all_acceptances
        all_acceptances.append(acc_ratio)
    
    trajectory_plot(logposterior[1:],'figs/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_logposterior')
    trajectory_plot(logp[1:],'figs/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_logprior')
    trajectory_plot(logl[1:],'figs/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_loglikelihood')
    if prior_compare:
        boxplot_value_fn()
    for l in range(n_layer+1):
        np.save('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_w'+str(l)+'.npy',xi[0][l])
        np.save('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_b'+str(l)+'.npy',xi[1][l])
    func_plot(xi,'figs/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample')
    
    ESS(logposterior,'figs/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_autocorr')

# MAIN PROGRAMMES

In [47]:
"""

MAIN PROGRAMME 1 - compare NN prior to KL prior (large number of parameters)

"""    

prior_compare = True
policy_compare = False
dim_robust_test = False

# set maximal runtime
t_max = 3600*10

''' Initialise network network, see second block for detailed comments '''
input_size = dim
output_size = 1
n_layer = 3
hidden_sizes = [100,100,100]
hyps = str(hidden_sizes[0])
for i in range(1,n_layer):
    hyps = hyps+'_'+str(hidden_sizes[i])
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.Tanh(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.Tanh(),
                      nn.Linear(hidden_sizes[1], hidden_sizes[2]),
                      nn.Tanh(),
                      nn.Linear(hidden_sizes[-1], output_size))
weights = []
biases = []
for l in range(n_layer+1):
    weights.append(np.zeros(model[2*l].weight.shape))
    biases.append(np.zeros(model[2*l].bias.shape))
for l in range(n_layer+1):
    model[2*l].weight = torch.nn.Parameter(torch.from_numpy(0*weights[l]).float(), requires_grad=False)
    model[2*l].bias = torch.nn.Parameter(torch.from_numpy(0*biases[l]).float(), requires_grad=False)
''' Initialise covariance operator, see prior block for detailed comments '''
C = [[],[]]
C_root = [[],[]]
C_arr = np.ones(model[0].weight.shape)
for t in range(hidden_sizes[0]):
    for s in range(input_size):
        C_arr[t][s] = sigma_w_sq[0]/np.power(t+1,alpha)
C[0].append(C_arr)
C_root[0].append(C_arr**(1/2))
for l in range(1,n_layer):
    C_arr = np.ones(model[2*l].weight.shape)
    for t in range(hidden_sizes[l]):
        for s in range(hidden_sizes[l-1]):
            C_arr[t][s] = sigma_w_sq[l]/np.power(s+1,alpha)/np.power(t+1,alpha)
    C[0].append(C_arr)
    C_root[0].append(C_arr**(1/2))
C_arr = np.ones(model[2*n_layer].weight.shape)   
for t in range(output_size):
    for s in range(hidden_sizes[n_layer-1]):
        C_arr[t][s] = sigma_w_sq[n_layer]/np.power(s+1,alpha)
C[0].append(C_arr)   
C_root[0].append(C_arr**(1/2))
C_arr = np.ones(model[0].bias.shape)
for t in range(hidden_sizes[0]):
    C_arr[t] = sigma_b_sq[0]/np.power(t+1,alpha)
C[1].append(C_arr)
C_root[1].append(C_arr**(1/2))
for l in range(1,n_layer):
    C_arr = np.ones(model[2*l].bias.shape)
    for t in range(hidden_sizes[l]):
        C_arr[t] = sigma_b_sq[l]/np.power(t+1,alpha)
    C[1].append(C_arr)
    C_root[1].append(C_arr**(1/2))
C_arr = np.ones(model[2*n_layer].bias.shape)
for t in range(output_size):
    C_arr[t] = sigma_b_sq[n_layer]/np.power(t+1,alpha)
C[1].append(C_arr)
C_root[1].append(C_arr**(1/2))

# Sample from the prior to see what a sample looks like
xi = sample_prior()
func_plot(xi,'figs/MC/NN_'+hyps+'_a_prior_sample')

# Create environment 
env = gym.make('MountainCar-v0')
env = env.unwrapped
data = np.load('MC_data.npy')
N_data = 50

''' run pCN '''
method = 'pCN'
stochastic_gradients = False
unadjusted = False
# beta =  1/7 # for 10 on hidden layer (if all w = b = 1/3), and for 100 on hidden layer (if all w = 1/10, b = 1/30)
beta =  1/31  # for 100 on hidden layer (if all w = b = 1/3)
try:
    xi = [[],[]]
    for l in range(n_layer+1):
        xi[0].append(np.load('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_w'+str(l)+'.npy'))
        xi[1].append(np.load('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_b'+str(l)+'.npy'))
except FileNotFoundError:
    print('Starting from close to 0')
    xi = sample_prior(0.1)
MCMC(xi,N_data,data[0:N_data,:],t_max) 
                                          
env.close()   


MCMC algorithm (pCN, N_data=50, 36000 seconds) was started: Thu Jul  8 19:10:42 2021
Initial loglikelihood:  -17.84586236853932
Iteration: 565200    Acceptance ratio: 0.26    Percent: [--------------------------------------->] 100%  
MCMC algorithm terminated: Fri Jul  9 05:10:43 2021. 
Runtime = 36000.01379084587
Steps: 565200
Final loglikelihood:  -10.902652760540295
Acceptance ratio is  0.25993276716206654
Effective Sample Size: 109.0
Samples required to generate 1 independent sample: 5181.7


In [43]:
"""

MAIN PROGRAMME 2 - LEARN policy, and store samples for future use (small number of parameters)

"""    

prior_compare = False
policy_compare = True
dim_robust_test = False

# set maximal runtime
t_max = 3600*10

''' Initialise network network, see second block for detailed comments '''
input_size = dim
output_size = 1
n_layer = 3
hidden_sizes = [10,10,10]
hyps = str(hidden_sizes[0])
for i in range(1,n_layer):
    hyps = hyps+'_'+str(hidden_sizes[i])
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.Tanh(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.Tanh(),
                      nn.Linear(hidden_sizes[1], hidden_sizes[2]),
                      nn.Tanh(),
                      nn.Linear(hidden_sizes[-1], output_size))
weights = []
biases = []
for l in range(n_layer+1):
    weights.append(np.zeros(model[2*l].weight.shape))
    biases.append(np.zeros(model[2*l].bias.shape))
for l in range(n_layer+1):
    model[2*l].weight = torch.nn.Parameter(torch.from_numpy(0*weights[l]).float(), requires_grad=False)
    model[2*l].bias = torch.nn.Parameter(torch.from_numpy(0*biases[l]).float(), requires_grad=False)
''' Initialise covariance operator, see prior block for detailed comments '''
C = [[],[]]
C_root = [[],[]]
C_arr = np.ones(model[0].weight.shape)
for t in range(hidden_sizes[0]):
    for s in range(input_size):
        C_arr[t][s] = sigma_w_sq[0]/np.power(t+1,alpha)
C[0].append(C_arr)
C_root[0].append(C_arr**(1/2))
for l in range(1,n_layer):
    C_arr = np.ones(model[2*l].weight.shape)
    for t in range(hidden_sizes[l]):
        for s in range(hidden_sizes[l-1]):
            C_arr[t][s] = sigma_w_sq[l]/np.power(s+1,alpha)/np.power(t+1,alpha)
    C[0].append(C_arr)
    C_root[0].append(C_arr**(1/2))
C_arr = np.ones(model[2*n_layer].weight.shape)   
for t in range(output_size):
    for s in range(hidden_sizes[n_layer-1]):
        C_arr[t][s] = sigma_w_sq[n_layer]/np.power(s+1,alpha)
C[0].append(C_arr)   
C_root[0].append(C_arr**(1/2))
C_arr = np.ones(model[0].bias.shape)
for t in range(hidden_sizes[0]):
    C_arr[t] = sigma_b_sq[0]/np.power(t+1,alpha)
C[1].append(C_arr)
C_root[1].append(C_arr**(1/2))
for l in range(1,n_layer):
    C_arr = np.ones(model[2*l].bias.shape)
    for t in range(hidden_sizes[l]):
        C_arr[t] = sigma_b_sq[l]/np.power(t+1,alpha)
    C[1].append(C_arr)
    C_root[1].append(C_arr**(1/2))
C_arr = np.ones(model[2*n_layer].bias.shape)
for t in range(output_size):
    C_arr[t] = sigma_b_sq[n_layer]/np.power(t+1,alpha)
C[1].append(C_arr)
C_root[1].append(C_arr**(1/2))

# Sample from the prior to see what a sample looks like
xi = sample_prior()
func_plot(xi,'figs/MC/NN_'+hyps+'_a_prior_sample')

# Create environment 
env = gym.make('MountainCar-v0')
env = env.unwrapped
data = np.load('MC_data.npy')
N_data = 50 

''' run pCN '''
method = 'pCN'
stochastic_gradients = False
unadjusted = False
beta =  1/7
try:
    xi = [[],[]]
    for l in range(n_layer+1):
        xi[0].append(np.load('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_w'+str(l)+'.npy'))
        xi[1].append(np.load('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_b'+str(l)+'.npy'))
except FileNotFoundError:
    print('Starting from close to 0')
    xi = sample_prior(0.1)
MCMC(xi,N_data,data[0:N_data,:],t_max) 

''' run pCNL ''' # can be modified to CNL by just replacing the method to 'CNL'
method = 'pCNL'
stochastic_gradients = False # if true then unadjusted needs to be true too
unadjusted = False
delta = 1/400
try:
    xi = [[],[]]
    for l in range(n_layer+1):
        xi[0].append(np.load('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_w'+str(l)+'.npy'))
        xi[1].append(np.load('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_b'+str(l)+'.npy'))
except FileNotFoundError:
    print('\nStarting from close to 0')
    xi = sample_prior(0.1)
MCMC(xi,N_data,data[0:N_data,:],t_max) 
                                          
env.close()   


MCMC algorithm (pCNL, N_data=50, 36000 seconds) was started: Wed Jul  7 17:31:19 2021
Initial loglikelihood:  -29.19219649598323
Iteration: 106151    Acceptance ratio: 0.72    Percent: [--------------------------------------->] 100%  
MCMC algorithm terminated: Thu Jul  8 03:31:22 2021. 
Runtime = 36000.15491604805
Steps: 106151
Final loglikelihood:  -29.4802415896125
Acceptance ratio is  0.7195787133423143
Effective Sample Size: 312.0
Samples required to generate 1 independent sample: 340.23


In [9]:
"""

MAIN PROGRAMME 3 - check dimension-robustness of the proposed Neural Network prior

"""    

prior_compare = False
policy_compare = False
dim_robust_test = True

# set maximal runtime
t_max = 3600*5

all_acceptances = []

for hidden_size in [10,20,30,40,50,60,70,80,90,100]:
    ''' Initialise network network, see second block for detailed comments '''
    input_size = dim
    output_size = 1
    n_layer = 3
    hidden_sizes = [hidden_size,hidden_size,hidden_size]
    hyps = str(hidden_sizes[0])
    for i in range(1,n_layer):
        hyps = hyps+'_'+str(hidden_sizes[i])
    model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                          nn.Tanh(),
                          nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                          nn.Tanh(),
                          nn.Linear(hidden_sizes[1], hidden_sizes[2]),
                          nn.Tanh(),
                          nn.Linear(hidden_sizes[-1], output_size))
    weights = []
    biases = []
    for l in range(n_layer+1):
        weights.append(np.zeros(model[2*l].weight.shape))
        biases.append(np.zeros(model[2*l].bias.shape))
    for l in range(n_layer+1):
        model[2*l].weight = torch.nn.Parameter(torch.from_numpy(0*weights[l]).float(), requires_grad=False)
        model[2*l].bias = torch.nn.Parameter(torch.from_numpy(0*biases[l]).float(), requires_grad=False)
    ''' Initialise covariance operator, see prior block for detailed comments '''
    # Recall that alpha = 0 here
    C = [[],[]]
    C_root = [[],[]]
    C_arr = np.ones(model[0].weight.shape)
    for t in range(hidden_sizes[0]):
        for s in range(input_size):
            C_arr[t][s] = sigma_w_sq[0]/np.power(t+1,alpha)
    C[0].append(C_arr)
    C_root[0].append(C_arr**(1/2))
    for l in range(1,n_layer):
        C_arr = np.ones(model[2*l].weight.shape)
        for t in range(hidden_sizes[l]):
            for s in range(hidden_sizes[l-1]):
                C_arr[t][s] = sigma_w_sq[l]/np.power(s+1,alpha)/np.power(t+1,alpha)
        C[0].append(C_arr)
        C_root[0].append(C_arr**(1/2))
    C_arr = np.ones(model[2*n_layer].weight.shape)   
    for t in range(output_size):
        for s in range(hidden_sizes[n_layer-1]):
            C_arr[t][s] = sigma_w_sq[n_layer]/np.power(s+1,alpha)
    C[0].append(C_arr)   
    C_root[0].append(C_arr**(1/2))
    C_arr = np.ones(model[0].bias.shape)
    for t in range(hidden_sizes[0]):
        C_arr[t] = sigma_b_sq[0]/np.power(t+1,alpha)
    C[1].append(C_arr)
    C_root[1].append(C_arr**(1/2))
    for l in range(1,n_layer):
        C_arr = np.ones(model[2*l].bias.shape)
        for t in range(hidden_sizes[l]):
            C_arr[t] = sigma_b_sq[l]/np.power(t+1,alpha)
        C[1].append(C_arr)
        C_root[1].append(C_arr**(1/2))
    C_arr = np.ones(model[2*n_layer].bias.shape)
    for t in range(output_size):
        C_arr[t] = sigma_b_sq[n_layer]/np.power(t+1,alpha)
    C[1].append(C_arr)
    C_root[1].append(C_arr**(1/2))

    # Sample from the prior to see what a sample looks like
    xi = sample_prior()
    func_plot(xi,'figs/MC/NN_'+hyps+'_a_prior_sample')

    # Create environment 
    env = gym.make('MountainCar-v0')
    env = env.unwrapped

    data = np.load('MC_data.npy')

    for N_data in [50]: 

        ''' run pCN '''
        method = 'pCN'
        stochastic_gradients = False
        unadjusted = False
        beta =  1/7
        try:
            xi = [[],[]]
            for l in range(n_layer+1):
                xi[0].append(np.load('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_w'+str(l)+'.npy'))
                xi[1].append(np.load('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_b'+str(l)+'.npy'))
        except FileNotFoundError:
            print('\nStarting from close to 0')
            xi = sample_prior(0.1)
        MCMC(xi,N_data,data[0:N_data,:],t_max) 
        print('Number of layers: '+str(int(n_layer))+', number of nodes per layer: '+str(int(hidden_size)))

    env.close()   

np.save('MC_dim_independence_acceptances.npy',all_acceptances)
    
fig, ax = plt.subplots()
ax.set_title('Acceptance ratio by layer width')
ax.plot([10,20,30,40,50,60,70,80,90,100],all_acceptances)

fig.savefig('figs/NN_dim_independence.pdf', dpi=300)
plt.close(fig)


MCMC algorithm (pCN, N_data=50, 18000 seconds) was started: Fri Apr  8 22:47:27 2022
Initial loglikelihood:  -35.19923810573533


KeyboardInterrupt: 

In [11]:
all_acceptances = np.load('MC_dim_independence_acceptances.npy')
print('\nRespective acceptances and total number of parameters for the different layer widths:')
for N_l in [10,20,30,40,50,60,70,80,90,100]:
    number_w = N_l+2*(N_l**2)+2*N_l
    number_b = 1+3*N_l
    if N_l==100:
        print(str(N_l)+':  '+str(round(all_acceptances[int(N_l/10-1)]*100,2))+'%'+'     '+str(number_w+number_b))
    else:
        print(str(N_l)+':   '+str(round(all_acceptances[int(N_l/10-1)]*100,2))+'%'+'     '+str(number_w+number_b))


Respective acceptances and total number of parameters for the different layer widths:
10:   26.85%     261
20:   15.62%     921
30:   9.09%     1981
40:   5.09%     3441
50:   2.73%     5301
60:   1.51%     7561
70:   0.86%     10221
80:   0.5%     13281
90:   0.36%     16741
100:  0.18%     20601


In [16]:
"""

MAIN PROGRAMME 4 - check dimension-robustness of the proposed Neural Network prior, 1/N scaling

"""    

prior_compare = False
policy_compare = False
dim_robust_test = True

# set maximal runtime
t_max = 3600*5

all_acceptances = []

for hidden_size in [10,20,30,40,50,60,70,80,90,100]:
    ''' Initialise network network, see second block for detailed comments '''
    input_size = dim
    output_size = 1
    n_layer = 3
    hidden_sizes = [hidden_size,hidden_size,hidden_size]
    hyps = str(hidden_sizes[0])
    for i in range(1,n_layer):
        hyps = hyps+'_'+str(hidden_sizes[i])
    model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                          nn.Tanh(),
                          nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                          nn.Tanh(),
                          nn.Linear(hidden_sizes[1], hidden_sizes[2]),
                          nn.Tanh(),
                          nn.Linear(hidden_sizes[-1], output_size))
    weights = []
    biases = []
    for l in range(n_layer+1):
        weights.append(np.zeros(model[2*l].weight.shape))
        biases.append(np.zeros(model[2*l].bias.shape))
    for l in range(n_layer+1):
        model[2*l].weight = torch.nn.Parameter(torch.from_numpy(0*weights[l]).float(), requires_grad=False)
        model[2*l].bias = torch.nn.Parameter(torch.from_numpy(0*biases[l]).float(), requires_grad=False)
    ''' Initialise covariance operator, see prior block for detailed comments '''
    C = [[],[]]
    C_root = [[],[]]
    C_arr = np.ones(model[0].weight.shape)
    for t in range(hidden_sizes[0]):
        for s in range(input_size):
            C_arr[t][s] = sigma_w_sq[0]/np.sqrt(dim)
    C[0].append(C_arr)
    C_root[0].append(C_arr**(1/2))
    for l in range(1,n_layer):
        C_arr = np.ones(model[2*l].weight.shape)
        for t in range(hidden_sizes[l]):
            for s in range(hidden_sizes[l-1]):
                C_arr[t][s] = sigma_w_sq[l]*10/np.sqrt(hidden_size)
        C[0].append(C_arr)
        C_root[0].append(C_arr**(1/2))
    C_arr = np.ones(model[2*n_layer].weight.shape)   
    for t in range(output_size):
        for s in range(hidden_sizes[n_layer-1]):
            C_arr[t][s] = sigma_w_sq[n_layer]*10/np.sqrt(hidden_size)
    C[0].append(C_arr)   
    C_root[0].append(C_arr**(1/2))
    C_arr = np.ones(model[0].bias.shape)
    for t in range(hidden_sizes[0]):
        C_arr[t] = sigma_b_sq[0]
    C[1].append(C_arr)
    C_root[1].append(C_arr**(1/2))
    for l in range(1,n_layer):
        C_arr = np.ones(model[2*l].bias.shape)
        for t in range(hidden_sizes[l]):
            C_arr[t] = sigma_b_sq[l]
        C[1].append(C_arr)
        C_root[1].append(C_arr**(1/2))
    C_arr = np.ones(model[2*n_layer].bias.shape)
    for t in range(output_size):
        C_arr[t] = sigma_b_sq[n_layer]
    C[1].append(C_arr)
    C_root[1].append(C_arr**(1/2))

    # Sample from the prior to see what a sample looks like
    xi = sample_prior()
    func_plot(xi,'figs/MC/NN_'+hyps+'_a_prior_sample2')

    # Create environment 
    env = gym.make('MountainCar-v0')
    env = env.unwrapped

    data = np.load('MC_data.npy')

    for N_data in [50]: 

        ''' run pCN '''
        method = 'pCN'
        stochastic_gradients = False
        unadjusted = False
        beta =  1/12
        try:
            xi = [[],[]]
            for l in range(n_layer+1):
                xi[0].append(np.load('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_w'+str(l)+'.npy'))
                xi[1].append(np.load('np_saved/MC/NN_'+hyps+'_'+method+'_NData'+str(N_data)+'_lastSample_b'+str(l)+'.npy'))
        except FileNotFoundError:
            print('\nStarting from close to 0')
            xi = sample_prior(0.1)
        MCMC(xi,N_data,data[0:N_data,:],t_max) 
        print('Number of layers: '+str(int(n_layer))+', number of nodes per layer: '+str(int(hidden_size)))

    env.close()   

np.save('MC_dim_independence_acceptances2.npy',all_acceptances)
    
fig, ax = plt.subplots()
ax.set_title('Acceptance ratio by layer width')
ax.plot([10,20,30,40,50,60,70,80,90,100],all_acceptances)

fig.savefig('figs/NN_dim_independence2.pdf', dpi=300)
plt.close(fig)


MCMC algorithm (pCN, N_data=50, 18000 seconds) was started: Fri Apr  8 23:04:43 2022
Initial loglikelihood:  -35.19923810573533
Iteration: 279211    Acceptance ratio: 0.212    Percent: [--------------------------------------->] 100%  
MCMC algorithm terminated: Sat Apr  9 04:04:43 2022. 
Runtime = 18000.008841991425
Steps: 279211
Final loglikelihood:  -18.447714281226723
Acceptance ratio is  0.21237701953003285
Effective Sample Size: 306
Samples required to generate 1 independent sample: 913.45
Number of layers: 3, number of nodes per layer: 10

MCMC algorithm (pCN, N_data=50, 18000 seconds) was started: Sat Apr  9 04:05:08 2022
Initial loglikelihood:  -48.952184777819724
Iteration: 277155    Acceptance ratio: 0.15    Percent: [--------------------------------------->] 100%  
MCMC algorithm terminated: Sat Apr  9 09:05:08 2022. 
Runtime = 18000.009399175644
Steps: 277155
Final loglikelihood:  -22.85819067415917
Acceptance ratio is  0.15008930021107322
Effective Sample Size: 292
Sample

In [17]:
all_acceptances = np.load('MC_dim_independence_acceptances2.npy')
print('\nRespective acceptances and total number of parameters for the different layer widths:')
for N_l in [10,20,30,40,50,60,70,80,90,100]:
    number_w = N_l+2*(N_l**2)+2*N_l
    number_b = 1+3*N_l
    if N_l==100:
        print(str(N_l)+':  '+str(round(all_acceptances[int(N_l/10-1)]*100,2))+'%'+'     '+str(number_w+number_b))
    else:
        print(str(N_l)+':   '+str(round(all_acceptances[int(N_l/10-1)]*100,2))+'%'+'     '+str(number_w+number_b))


Respective acceptances and total number of parameters for the different layer widths:
10:   21.24%     261
20:   15.01%     921
30:   10.92%     1981
40:   8.52%     3441
50:   6.81%     5301
60:   5.47%     7561
70:   4.25%     10221
80:   3.91%     13281
90:   2.97%     16741
100:  2.23%     20601
