## Imports

In [3]:
# Note this script needs a conda environment with sbibm installed, e.g.:

# using Conda
# using Pkg

# ENV["PYTHON"] = ""
# Pkg.build("PyCall")

# Conda.pip_interop(true)
# Conda.pip("install", "sbibm")

# Make sure up to date
# Pkg.rm("SyntheticLikelihood")
# Pkg.add(url="https://github.com/danielward27/SyntheticLikelihood.jl")
using SyntheticLikelihood
using PyCall
using Distributions
using DelimitedFiles
using Random
using Parameters
using LinearAlgebra
using DataFrames
using CSV

sbibm = pyimport("sbibm")
torch = pyimport("torch");

## Convert stuff from python to julia

In [4]:
task_priors = include("task_priors.jl")
String.(keys(task_priors))

("gaussian_linear", "gaussian_linear_uniform", "gaussian_mixture", "bernoulli_glm")

### Rough test that prior conversion looks right

In [5]:
for task_name in String.(keys(task_priors))
    n = 2000
    jl_prior = task_priors[Symbol(task_name)]
    jl_samples = sample_θ(jl_prior, n)
    jl_mean = mean.(eachcol(jl_samples))
    jl_cov = cov(jl_samples)

    py_prior = sbibm.get_task(task_name).get_prior()
    py_samples = py_prior(n).numpy();
    py_mean = mean.(eachcol(py_samples))
    py_cov = cov(py_samples)
    
    println(task_name)
    println("Julia means = $(round.(jl_mean; digits = 2))")
    println("Python means = $(round.(py_mean; digits = 2)) \n")
    
    @assert size(py_mean) == size(jl_mean)
    @assert isapprox(py_mean, jl_mean; rtol = 2)
    @assert isapprox(py_cov, jl_cov; rtol = 0.7)
end

gaussian_linear
Julia means = [0.01, -0.0, 0.01, -0.01, -0.0, 0.02, -0.0, -0.01, 0.0, 0.0]
Python means = Float32[0.0, 0.0, 0.0, 0.0, 0.01, -0.01, -0.01, -0.01, 0.0, -0.0] 

gaussian_linear_uniform
Julia means = [0.0, -0.0, -0.0, -0.01, 0.02, 0.01, -0.03, 0.01, -0.02, -0.02]
Python means = Float32[-0.01, -0.0, -0.02, -0.01, -0.01, -0.01, -0.0, 0.02, 0.01, -0.0] 

gaussian_mixture
Julia means = [0.12, 0.08]
Python means = Float32[0.11, 0.17] 

bernoulli_glm
Julia means = [-0.0, -0.0, -0.02, -0.03, -0.04, -0.05, -0.03, -0.01, 0.01, 0.03]
Python means = Float32[0.04, -0.03, -0.03, -0.01, 0.0, -0.03, -0.05, -0.02, 0.0, 0.03] 



## Check the variance ratios between prior and posterior
To get a good idea for the defualt proposal we can compare the variance of the posterior to the prior on the tasks.

In [6]:
mean_ratio = begin
    ratios = []
    for task_name in String.(keys(task_priors))
        n = 2000
        jl_prior = task_priors[Symbol(task_name)]
        prior_var = diag(cov(jl_prior))
        py_task = sbibm.get_task(task_name)
        posterior_samples = py_task.get_reference_posterior_samples(1).numpy()
        posterior_var = diag(cov(posterior_samples))
        ratio = mean(posterior_var ./ prior_var)
        println(task_name, ": ", ratio)
        push!(ratios, ratio)
    end
    mean(ratios)
end

println("The mean variance ratio is $(mean_ratio)")

gaussian_linear: 0.4998179227113724
gaussian_linear_uniform: 0.2024229694157839
gaussian_mixture: 0.010489855706691741
bernoulli_glm: 0.0981549893539872
The mean variance ratio is 0.20272143429695877


In [7]:
function get_jl_simulator(task)   
    py_simulator = task.get_simulator()
    simulator(θ::Vector{Float64}) = begin
        θ = torch.tensor(θ, dtype = torch.float32)
        x = py_simulator(θ)
        convert(Vector{Float64}, vec(x.numpy()))
    end
    simulator
end;

In [8]:
struct JuliaTask
    name
    simulator
    prior
    s_true
    obs_seed
end

function JuliaTask(python_task, obs_seed::Integer)
    name = python_task.name
    simulator = get_jl_simulator(python_task)
    prior = task_priors[Symbol(name)]
    s_true = vec(python_task.get_observation(obs_seed).numpy())
    s_true = convert(Vector{Float64}, s_true)
    JuliaTask(name, simulator, prior, s_true, obs_seed)
end;

## Get starting parameters
To get the starting parameters I will sample 1000 sets of parameters and take the mean of all the parameters.

In [9]:
function get_starting_params(prior::Prior)
    mean.(eachcol(sample_θ(prior, 1000)))
end

get_starting_params (generic function with 1 method)

## Loop through tasks and run the Riemannian ULA algorithm

In [10]:
const n_steps = 4000
const n_sim = 1000  # at each mcmc iteration
const n_burn = 1000;

In [25]:
algorithm = "rula"
tasks = []
run_times = []
errors = []

for (i, task_name) in enumerate(String.(keys(task_priors)))
    @info "Task = $(task_name)"

    Random.seed!(i)
    pytask = sbibm.get_task(task_name)
    jltask = JuliaTask(pytask, 1)
    @unpack simulator, prior, s_true, obs_seed = jltask
    
    init_θ = get_starting_params(prior)

    local_posterior = LocalPosterior(;
      simulator, s_true, n_sim, prior,
    )
    
    rula = RiemannianULA(0.5)
    
    time = @elapsed data = try
        run_sampler!(rula, local_posterior; init_θ, n_steps)
    catch e
        @warn "$(task_name) failed!"
        push!(errors, e)
        continue
    end
    
    open("./samples/$(task_name)_$(algorithm).txt", "w") do io
        writedlm(io, data.θ[(n_burn+1):end, :])
    end
                             
    push!(tasks, task_name)
    push!(run_times, time)

end

df = DataFrame(task = tasks, run_time = run_times)
CSV.write("./results/$(algorithm).csv", df)

┌ Info: Task = gaussian_linear
└ @ Main In[25]:7
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:27:27[39m
┌ Info: Task = gaussian_linear_uniform
└ @ Main In[25]:7
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:26:19[39m
┌ Info: Task = gaussian_mixture
└ @ Main In[25]:7
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:12:14[39m
┌ Info: Task = sir
└ @ Main In[25]:7
│ covariance.
└ @ SyntheticLikelihood /home/dw16200/.julia/packages/SyntheticLikelihood/vVDAq/src/glm_local_regression.jl:69
└ @ Main In[25]:25
┌ Info: Task = bernoulli_glm
└ @ Main In[25]:7
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:27:19[39m


"./results/rula_step_size_05.csv"

## Loop through tasks and run basic Bayesian Synthetic Likelihood
Below we use standard synthetic likelihood. We use a burn in of 1000 steps (either accepted or rejected), and then use the empirical covariance matrix of the last 75% of samples of the burn in to form the proposal distribution for the next steps.

In [32]:
algorithm = "rwm"
tasks = []
run_times = []
accecptance_rates = []
errors = []

for (i, task_name) in enumerate(String.(keys(task_priors)))
    @info "Task = $(task_name)"

    Random.seed!(i)
    pytask = sbibm.get_task(task_name)
    jltask = JuliaTask(pytask, 1)
    
    @unpack simulator, prior, s_true, obs_seed = jltask
    
    init_θ = get_starting_params(prior)
    
    # Burn in 1000 "steps" and 0.2*covariance of the prior
    rwm = RWMetropolis(MvNormal(0.2*cov(prior)))
    basic_posterior = BasicPosterior(;simulator, s_true, n_sim, prior)
    
    time1 = @elapsed data = try
         run_sampler!(rwm, basic_posterior; init_θ, n_steps = n_burn, collect_data = [:θ, :accepted])
    catch e
        @warn "$(task_name) failed!"
        push!(errors, e)
    end

    burn_in_θ = data.θ
    quarter = round(Int64, size(burn_in_θ, 1)*0.25)  
    new_Σ = (2.38^2)*cov(burn_in_θ[quarter:end, :]) ./ size(burn_in_θ, 2)
    init_θ = burn_in_θ[end, :]
        
    # Actual run
    rwm = RWMetropolis(MvNormal(new_Σ))
    basic_posterior = BasicPosterior(;simulator, s_true, n_sim, prior)
    time2 = @elapsed data = run_sampler!(rwm, basic_posterior; init_θ, n_steps = n_steps - n_burn, collect_data = [:θ, :accepted])
     
    open("./samples/$(task_name)_$(algorithm).txt", "w") do io
            writedlm(io, data.θ)
    end
    
    push!(tasks, task_name)
    push!(run_times, time1 + time2)
    push!(accecptance_rates, sum(data.accepted)/length(data.accepted))
end

df = DataFrame(task = tasks, run_time = run_times, acceptance_rate = accecptance_rates)
CSV.write("./results/$(algorithm).csv", df)

┌ Info: Task = gaussian_linear
└ @ Main In[32]:8
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:05:03[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:15:48[39m
┌ Info: Task = gaussian_linear_uniform
└ @ Main In[32]:8
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:08:03[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:17:52[39m
┌ Info: Task = gaussian_mixture
└ @ Main In[32]:8
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:02:45[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:08:06[39m
┌ Info: Task = bernoulli_glm
└ @ Main In[32]:8
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:04:51[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:14:26[39m


"./results/rwm.csv"

In [11]:
#using DelimitedFiles
#using GLM
#X = readdlm("X.txt")
#y = readdlm("y.txt")
#y = reshape(y, length(y))
#glm(X, y, Gamma(), LogLink(), maxiter=1000)

In [42]:
mean(LogNormal(1,10))

1.4093490824269389e22

### Run times on simple gaussian example
Check how the run time scales with the number of parameters, and the number of summary statistics

In [11]:
algorithm = "rula"
tasks = []
run_times = []
errors = []

Any[]

In [12]:
i = 1
task_name = String.(keys(task_priors))[1]
Random.seed!(i)
pytask = sbibm.get_task(task_name)
jltask = JuliaTask(pytask, 1)
@unpack simulator, prior, s_true, obs_seed = jltask

init_θ = get_starting_params(prior)

local_posterior = LocalPosterior(;
      simulator, s_true, n_sim, prior,
)

rula = RiemannianULA(0.5)
    
   
@profile run_sampler!(rula, local_posterior; init_θ, n_steps)
    

LoadError: LoadError: UndefVarError: @profile not defined
in expression starting at In[12]:17

In [2]:


for (i, task_name) in enumerate(String.(keys(task_priors)))
    @info "Task = $(task_name)"

    Random.seed!(i)
    pytask = sbibm.get_task(task_name)
    jltask = JuliaTask(pytask, 1)
    @unpack simulator, prior, s_true, obs_seed = jltask
    
    init_θ = get_starting_params(prior)

    local_posterior = LocalPosterior(;
      simulator, s_true, n_sim, prior,
    )
    
    rula = RiemannianULA(0.5)
    
   
        run_sampler!(rula, local_posterior; init_θ, n_steps)

    
    open("./samples/$(task_name)_$(algorithm).txt", "w") do io
        writedlm(io, data.θ[(n_burn+1):end, :])
    end
                             
    push!(tasks, task_name)
    push!(run_times, time)

end

df = DataFrame(task = tasks, run_time = run_times)
CSV.write("./results/$(algorithm).csv", df)

LoadError: UndefVarError: n_steps not defined