In [1]:
using Iterators;
using Plots
using Distributions;

In [2]:
plotlyjs();

## Helper functions

In [3]:
@everywhere function mcvar_bm(x::Vector{Float64}; batchlen::Int=100)
  nbatches = div(length(x), batchlen)
  @assert nbatches > 1 "Choose batch size such that the number of batches is greather than one"
  nbsamples = nbatches*batchlen
  batchmeans = Float64[mean(x[((j-1)*batchlen+1):(j*batchlen)]) for j = 1:nbatches]
  return batchlen*var(batchmeans)/nbsamples
end

@everywhere mcvar_iid(x) = var(x)/length(x)

@everywhere function ess(x)
  return length(x)*mcvar_iid(x)/mcvar_bm(x)
end 

## Model definition

We consider a simple $d$ dimensional mass spring model consisting of a regular mesh of $(M+1)^d$ points in $\mathbb{R}$, with the displacement at edges $0$ and $M+1$ fixed.  The vector $(u_{\alpha})_{\alpha} \in \mathbb{R}^N$, for $N = M^d$ defines the out-of-plane displacement for the remaining points.  We assume a simple quadratic potential energy of the form $E_N^{(r)}(u) = M^2 E_N(u)$ where $E_N(u)$ is given as before:
$$
    E_N(u) = \frac{1}{2}\sum_{\alpha \sim \beta} |u_\alpha  - u_{\beta}|^2 = \frac{1}{2} u\cdot H u.
$$
To this end, we define the rescaled Hessian $H^{(r)}= M^2H$ which can be computed from the following function.

In [7]:
@everywhere function get_laplacian_index(p, M::Int64, d::Int64)
    return sum([p[i]*M^(i-1) for i in 1:d])
end

@everywhere function brutal_laplacian(M::Int64, d::Int64)
    dirs = speye(Int, d)
    L = zeros(M^d)

    I = Int64[]
    J = Int64[]
    V = Float64[]
    for p in product([collect(0:M-1) for k=1:d]...)
        index = get_laplacian_index(p, M, d)

        for j in 1:d
            for sign in (+1, -1)
                pnew = [p[i] + sign*dirs[i,j] for i in 1:d]
                if all(0.<= pnew .<=M-1)
                    #Calculate indices
                    new_index = get_laplacian_index(pnew, M, d)

                    #Add matrix entries
                    push!(I, index+1)
                    push!(J, new_index+1)
                    push!(V, -M^2)
                end
                
                push!(I, index+1)
                push!(J, index+1)            
                push!(V, M^2)
            end
        end
    end
    return sparse(I,J,V)
end

In [8]:
@everywhere function generatePotentialFunctions(H)
    pot = function (x) return 0.5*dot(x, H*x) end
    gradPot = function (x) return H*x   end
    return pot, gradPot
end

## Spectrum of $H^{(r)}$

The spectrum of $H^{(r)}$ satisfies the scaling $\lambda_j \sim j^{2/d}$, for $j\in \mathbb{N}$.  In particular $\sigma(H)\subset [1, \infty)$. 

In [9]:
M = 4
dim = 3;
H = brutal_laplacian(M,dim);
evals = eigs(H, nev=M^dim-1, which=:SM)[1]

ind = collect(1:M^dim -1)
evals_theory = (ind).^(2/dim)

# plot([p1,p2])
plot(ind, [evals evals_theory],style=:auto, label=["Eigs" "Theory"]) 
yaxis!("eig_i",:log10)
xaxis!("i",:log10)


In [13]:
M = 10
dim = 3;
L = brutal_laplacian(M,dim);
evals = eigs(L, nev=M^dim-1, which=:SM)[1]

ind = collect(1:M^dim -1)

# plot([p1,p2])
plot(ind, 1.0./evals,style=:auto, label=["Covar Eigs"]) 
yaxis!("eig_i",:log10)
xaxis!("i",:log10)


## MALA Scheme

Our objective is to compute $I = \int f(x)\mu(dx)$, where $\mu(dx) \sim \exp(-E^{(r)}_N(x))\,dx$ and $f(x) = \frac{1}{N}E^{(r)}_N(x)$.  It is clear that $I = \frac{1}{N}\frac{\int x\cdot H^{(r)} x e^{-\frac{x \cdot H^{(r)} x}{2}}\,dx}{\int e^{-\frac{x \cdot H^{(r)} x}{2}}\,dx} = \frac{1}{2}$.  Moreover, the stationary variance is $\frac{1}{4N^2} \frac{\int (x\cdot H^{(r)} x) (x\cdot H^{(r)} x) e^{-\frac{x \cdot H^{(r)} x}{2}}\,dx}{\int e^{-\frac{x \cdot H^{(r)} x}{2}}\,dx}  - \frac{1}{4} =  \frac{1}{2N},$ and so is bounded above uniformly in $N$.

As before, we use a preconditioned Metropolis-Adjusted Langevin scheme (P-MALA), where $P$ is an appropriate preconditioning matrix.  

In [14]:
@everywhere function timestepPMALA(x0::Array{Float64, 1}, P::SparseMatrixCSC{Float64,Int64}, 
                                   numsteps::Int64, latticeSize::Int64, 
                                   dim::Int64, delta::Float64; prec=true)
    num_acc = 0
    effDim = latticeSize^dim
    
    state = zeros(effDim, numsteps)
    state[:, 1] = x0
        
    V, gradV = generatePotentialFunctions(P)
    
    if !prec
        P = speye(effDim)
    end
    Pfact = cholfact(full(P))
    
    Vx = V(x0)
    driftx = Pfact\gradV(x0)
    
    @inbounds @fastmath for n=1:numsteps-1
        x = state[:, n]
        
        noise = Pfact[:U]\randn(effDim)
        y = x - driftx*delta + sqrt(2*delta)*noise
        
        r = y - (x - delta*driftx)
        lyx = dot(r, P*r)/(4*delta)
                
        drifty = Pfact\gradV(y)
        Vy = V(y)
        r = x - (y - delta*drifty)
        lxy = dot(r, P*r)/(4*delta)
        
        if -log(rand()) > V(y) - V(x) + lxy - lyx
            x = y
            num_acc += 1
            Vx = Vy
            driftx = drifty
        end 
        
        state[:,n+1] = x
    end
    
    return state, num_acc/numsteps
end

Since we consider a quadratic potential we can sample from the invariant distribution $\mu(dx)$ directly, using the following function:

In [15]:
@everywhere function sampleStatDist(Hinv::Array{Float64,2})
    return rand(MvNormal(Hinv))
end

The following function generates a timeseries using P-MALA and applies the function $f(x) = \frac{1}{2N}x\cdot H^{(r)} x$, starting from stationarity.

In [16]:
@everywhere function estfun(M::Int64, dim::Int64, H::AbstractSparseMatrix, Hinv, numsteps::Int64,  delta::Float64, prec::Bool)
   # println("M = $M, dim = $dim, numsteps = $numsteps, delta = $delta")
    x0 = sampleStatDist(Hinv)
    x_trace, acc_rate=  timestepPMALA(x0, H, numsteps, M, dim, delta; prec=prec);
    
    V,_ = generatePotentialFunctions(H);

    return vec(mapslices(V, x_trace, 1)/(M^dim))
end


The following function implements a basic grid search approach to tuning step-size based on Effective Sample Size.  This is not a very robust approach.

In [17]:
function tune_stepsize(dim::Int64, numsteps::Int64, prec::Bool, deltas, Ms)
    delta_opt = zeros(length(Ms))

    for (i, M) in enumerate(Ms)
        H = brutal_laplacian(M, dim);
        Hinv= inv(full(H))
        ess_vs_delta = Array{Float64,1}([ess(estfun(M, dim, H, Hinv, numsteps,  delta, prec)) for delta in deltas])
        ess_vs_delta[ess_vs_delta.==Inf]=0
        ess_max, index = findmax(ess_vs_delta);
        delta_max = deltas[index];
        delta_opt[i] = delta_max
        println("[Tuning] M = $M : $ess_max, $index, $delta_max")
    end
    
    return delta_opt
end

tune_stepsize (generic function with 1 method)

Given a sequence of lattice sizes $M$ and optimal time-step sizes, computes variance and MSE of runs for $N_{runs}$ independent realisations of $Numsteps$ step chains starting from the invariant measure.

In [18]:
function compute_trace_stats(Ms, delta_opts, Nruns, Numsteps, prec)
    vars = zeros(length(Ms))
    mses = zeros(length(Ms))

    for (i, M) in enumerate(Ms)
        delta = delta_opts[i]
        H = brutal_laplacian(M, dim);
        Hinv = inv(full(H))
        runs = Array{Float64}([mean(estfun(M, dim, H, Hinv, numsteps,  delta, prec)) for j in 1:Nruns])
        vars[i] = var(runs)
        mses[i] = mean((runs-0.5).^2)
        println("[Computing Stats] M = $M,  : Var = ", vars[i], " MSE = ", mses[i])
    end
    
    return vars, mses
end

compute_trace_stats (generic function with 1 method)

# Variance and MSE computations

In this section we compare the variance and MSE for the preconditioned MALA with $P = H^{(r)}$ to the standard MALA approach, where $P = I$.  We compare in terms of dimension $d$ and lattice size $M$.

In [19]:
deltaP = 0.01:0.1:0.6
deltaU = 0.01:0.01:0.1

numsteps=10000
Nruns = 100
Ms = [2, 4, 6, 8, 10];

## One dimension

In [20]:
dim=1

#Preconditioned 
delta_opts = tune_stepsize(dim, numsteps, true, deltaP, Ms);
vars1_pmala, mses1_pmala = compute_trace_stats(Ms, delta_opts, Nruns, numsteps, true);

#NonPreconditioned 
delta_opts = tune_stepsize(dim, numsteps, false, deltaU, Ms);
vars1_mala, mses1_mala = compute_trace_stats(Ms, delta_opts, Nruns, numsteps, false);


[Tuning] M = 2 : 5141.393257213153, 6, 0.51
[Tuning] M = 4 : 5225.380296323799, 6, 0.51
[Tuning] M = 6 : 4777.659023224319, 6, 0.51
[Tuning] M = 8 : 3550.995254258974, 6, 0.51
[Tuning] M = 10 : 3326.2117348470715, 6, 0.51
[Computing Stats] M = 2,  : Var = 5.379669016972292e-5 MSE = 5.333117904780232e-5
[Computing Stats] M = 4,  : Var = 3.064369549900881e-5 MSE = 3.04415355048107e-5
[Computing Stats] M = 6,  : Var = 2.2629171924666597e-5 MSE = 2.2443482536140417e-5
[Computing Stats] M = 8,  : Var = 1.8617481246025796e-5 MSE = 1.8604105625006175e-5
[Computing Stats] M = 10,  : Var = 2.0201558438320673e-5 MSE = 2.0074729268049003e-5
[Tuning] M = 2 : 4400.052093671557, 9, 0.09
[Tuning] M = 4 : 2392.717786736443, 2, 0.02
[Tuning] M = 6 : 9900.9900990099, 8, 0.08
[Tuning] M = 8 : 52376.23762376237, 9, 0.09
[Tuning] M = 10 : 1100.1100110011, 5, 0.05
[Computing Stats] M = 2,  : Var = 7.369380341156586e-5 MSE = 7.350208278892687e-5
[Computing Stats] M = 4,  : Var = 5.7150492127324826e-5 MSE = 5

Plotting the results here

In [21]:
plot(Ms, [vars1_pmala vars1_mala],style=:auto, label=["P-MALA" "MALA"]) 
yaxis!("Var",:log10)
xaxis!("M",:log10)
title!("Variance for N=$numsteps timesteps for d =$dim")

## Two dimensions

In [22]:
dim=2

#Preconditioned 
delta_opts = tune_stepsize(dim, numsteps, true, deltaP, Ms);
vars2_pmala, mses2_pmala = compute_trace_stats(Ms, delta_opts, Nruns, numsteps, true);

#NonPreconditioned 
delta_opts = tune_stepsize(dim, numsteps, false, deltaU, Ms);
vars2_mala, mses2_mala = compute_trace_stats(Ms, delta_opts, Nruns, numsteps, false);


[Tuning] M = 2 : 4288.678532645808, 5, 0.41000000000000003
[Tuning] M = 4 : 3301.7003806514854, 6, 0.51
[Tuning] M = 6 : 1883.472945859169, 4, 0.31000000000000005
[Tuning] M = 8 : 1580.2301329788877, 4, 0.31000000000000005
[Tuning] M = 10 : 1505.6868931683264, 3, 0.21000000000000002
[Computing Stats] M = 2,  : Var = 3.351846218416565e-5 MSE = 3.3318175562486276e-5
[Computing Stats] M = 4,  : Var = 1.3297015246197405e-5 MSE = 1.3366371971234945e-5
[Computing Stats] M = 6,  : Var = 7.545660569921836e-6 MSE = 7.489907118823008e-6
[Computing Stats] M = 8,  : Var = 5.535855664721876e-6 MSE = 5.5822010111571035e-6
[Computing Stats] M = 10,  : Var = 3.7379089914745963e-6 MSE = 3.7185623474914334e-6
[Tuning] M = 2 : 4729.3318751708985, 3, 0.03
[Tuning] M = 4 : 4851.485148514851, 7, 0.07
[Tuning] M = 6 : 83267.32673267325, 2, 0.02
[Tuning] M = 8 : 4851.485148514851, 7, 0.07
[Tuning] M = 10 : 3564.356435643564, 5, 0.05
[Computing Stats] M = 2,  : Var = 2.9388938943182262e-5 MSE = 2.9370287518570

In [23]:
plot(Ms, [vars2_pmala vars2_mala],style=:auto, label=["P-MALA" "MALA"]) 
yaxis!("Var",:log10)
xaxis!("M",:log10)
title!("Variance for N=$numsteps timesteps for d =$dim")

## Three dimensions

In [24]:
dim=3

#Preconditioned 
delta_opts = tune_stepsize(dim, numsteps, true, deltaP, Ms);
vars3_pmala, mses3_pmala = compute_trace_stats(Ms, delta_opts, Nruns, numsteps, true);

#NonPreconditioned 
delta_opts = tune_stepsize(dim, numsteps, false, deltaU, Ms);
vars3_mala, mses3_mala = compute_trace_stats(Ms, delta_opts, Nruns, numsteps, false);

[Tuning] M = 2 : 4487.320002284564, 5, 0.41000000000000003
[Tuning] M = 4 : 1530.9910464097984, 3, 0.21000000000000002
[Tuning] M = 6 : 903.8635856893785, 3, 0.21000000000000002
[Tuning] M = 8 : 742.0312975890897, 2, 0.11
[Tuning] M = 10 : 1584.158415841584, 6, 0.51
[Computing Stats] M = 2,  : Var = 1.7778870401789836e-5 MSE = 1.7612920641639008e-5
[Computing Stats] M = 4,  : Var = 5.339373766705735e-6 MSE = 5.323475277262587e-6
[Computing Stats] M = 6,  : Var = 3.0244651811333863e-6 MSE = 3.0187670414663204e-6
[Computing Stats] M = 8,  : Var = 1.4082968294772065e-6 MSE = 1.3944560991372255e-6
[Computing Stats] M = 10,  : Var = 0.0006567382760588091 MSE = 0.0006555311287599108
[Tuning] M = 2 : 3124.2657295387335, 2, 0.02
[Tuning] M = 4 : 2475.247524752475, 3, 0.03
[Tuning] M = 6 : 396.039603960396, 2, 0.02
[Tuning] M = 8 : 3179.3179317931795, 2, 0.02
[Tuning] M = 10 : 22277.227722772273, 2, 0.02
[Computing Stats] M = 2,  : Var = 2.1250815310149185e-5 MSE = 2.1038996946003254e-5
[Comput

In [26]:
plot(Ms, [vars3_pmala vars3_mala],style=:auto, label=["P-MALA" "MALA"]) 
yaxis!("Var",:log10)
xaxis!("M",:log10)
title!("Variance for N=$numsteps timesteps for d =$dim")

I would not read too much into the last data point, most likely poor choice of tuning parameter.