## Imports

In [1]:
# Note this script needs a conda environment with sbibm installed, e.g.:

# using Conda
# using Pkg

# ENV["PYTHON"] = ""
# Pkg.build("PyCall")

# Conda.pip_interop(true)
# Conda.pip("install", "sbibm")

# Make sure up to date
# Pkg.rm("SyntheticLikelihood")
# Pkg.add(url="https://github.com/danielward27/SyntheticLikelihood.jl")
using SyntheticLikelihood
using PyCall
using Distributions
using DelimitedFiles
using Random
using Parameters
using LinearAlgebra
using DataFrames
using CSV

sbibm = pyimport("sbibm")
torch = pyimport("torch");

## Convert stuff from python to julia

In [2]:
task_priors = include("task_priors.jl")
String.(keys(task_priors))

("gaussian_linear", "gaussian_linear_uniform", "gaussian_mixture", "sir", "bernoulli_glm")

### Rough test that prior conversion looks right

In [None]:
for task_name in String.(keys(task_priors))
    n = 2000
    jl_prior = task_priors[Symbol(task_name)]
    jl_samples = sample_θ(jl_prior, n)
    jl_mean = mean.(eachcol(jl_samples))
    jl_cov = cov(jl_samples)

    py_prior = sbibm.get_task(task_name).get_prior()
    py_samples = py_prior(n).numpy();
    py_mean = mean.(eachcol(py_samples))
    py_cov = cov(py_samples)
    
    println(task_name)
    println("Julia means = $(round.(jl_mean; digits = 2))")
    println("Python means = $(round.(py_mean; digits = 2)) \n")
    
    @assert size(py_mean) == size(jl_mean)
    @assert isapprox(py_mean, jl_mean; rtol = 2)
    @assert isapprox(py_cov, jl_cov; rtol = 0.7)
end

## Check the variance ratios between prior and posterior
To get a good idea for the defualt proposal we can compare the variance of the posterior to the prior on the tasks.

In [None]:
mean_ratio = begin
    ratios = []
    for task_name in String.(keys(task_priors))
        n = 2000
        jl_prior = task_priors[Symbol(task_name)]
        prior_var = diag(cov(jl_prior))
        py_task = sbibm.get_task(task_name)
        posterior_samples = py_task.get_reference_posterior_samples(1).numpy()
        posterior_var = diag(cov(posterior_samples))
        ratio = mean(posterior_var ./ prior_var)
        println(task_name, ": ", ratio)
        push!(ratios, ratio)
    end
    mean(ratios)
end

println("The mean variance ratio is $(mean_ratio)")

In [3]:
function get_jl_simulator(task)   
    py_simulator = task.get_simulator()
    simulator(θ::Vector{Float64}) = begin
        θ = torch.tensor(θ, dtype = torch.float32)
        x = py_simulator(θ)
        convert(Vector{Float64}, vec(x.numpy()))
    end
    simulator
end;

In [4]:
struct JuliaTask
    name
    simulator
    prior
    s_true
    obs_seed
end

function JuliaTask(python_task, obs_seed::Integer)
    name = python_task.name
    simulator = get_jl_simulator(python_task)
    prior = task_priors[Symbol(name)]
    s_true = vec(python_task.get_observation(obs_seed).numpy())
    s_true = convert(Vector{Float64}, s_true)
    JuliaTask(name, simulator, prior, s_true, obs_seed)
end;

## Loop through tasks and run the Riemannian ULA algorithm

In [5]:
const n_steps = 4000
const n_sim = 1000  # at each mcmc iteration
const n_burn = 1000;

In [None]:
algorithm = "rula"
tasks = []
run_times = []

for (i, task_name) in enumerate(String.(keys(task_priors)))
    @info "Task = $(task_name)"

    Random.seed!(i)
    pytask = sbibm.get_task(task_name)
    jltask = JuliaTask(pytask, 1)
    
    @unpack simulator, prior, s_true, obs_seed = jltask
    
    init_θ = sample_θ(prior)

    local_posterior = LocalPosterior(;
      simulator, s_true, n_sim, prior,
    )
    
    rula = RiemannianULA(0.2)
    
    time = @elapsed data = run_sampler!(rula, local_posterior; init_θ, n_steps)
    open("./samples/$(task_name)_$(algorithm).txt", "w") do io
        writedlm(io, data.θ[(n_burn+1):end, :])
    end
                             
    push!(tasks, task_name)
    push!(run_times, time)

end

df = DataFrame(task = tasks, run_time = run_times)
CSV.write("./results/$(algorithm).csv", df)

## Loop through tasks and run basic Bayesian Synthetic Likelihood
Below we use standard synthetic likelihood. We use a burn in of 1000 "steps" (either accepted or rejected), and then use the empirical covariance matrix of the last 75% of samples of the burn in to form the proposal distribution for the next steps.

In [6]:
algorithm = "bsl"
tasks = []
run_times = []
accecptance_rates = []


for (i, task_name) in enumerate(String.(keys(task_priors)))
    @info "Task = $(task_name)"

    Random.seed!(i)
    pytask = sbibm.get_task(task_name)
    jltask = JuliaTask(pytask, 1)
    
    @unpack simulator, prior, s_true, obs_seed = jltask
    
    init_θ = sample_θ(prior)
    
    # Burn in 1000 "steps" and 0.2*covariance of the prior
    rwm = RWMetropolis(MvNormal(0.2*cov(prior)))
    basic_posterior = BasicPosterior(;simulator, s_true, n_sim, prior)
    
    time1 = @elapsed data = run_sampler!(rwm, basic_posterior; init_θ, n_steps = n_burn, collect_data = [:θ, :accepted])

    
    burn_in_θ = data.θ[data.accepted, :]
    quarter = round(Int64, size(burn_in_θ, 1)*0.25)  
    new_Σ = cov(burn_in_θ[quarter:end, :])
        
    # Actual run
    rwm = RWMetropolis(MvNormal(new_Σ))
    basic_posterior = BasicPosterior(;simulator, s_true, n_sim, prior)
    time2 = @elapsed data = run_sampler!(rwm, basic_posterior; init_θ, n_steps = n_steps - n_burn, collect_data = [:θ, :accepted])
     
    open("./samples/$(task_name)_$(algorithm).txt", "w") do io
            writedlm(io, data.θ[data.accepted, :])
    end
    
    push!(tasks, task_name)
    push!(run_times, time1 + time2)
    push!(accecptance_rates, sum(data.accepted)/length(data.accepted))
end

df = DataFrame(task = tasks, run_time = run_times, acceptance_rate = accecptance_rates)
CSV.write("./results/$(algorithm).csv", df)

┌ Info: Task = gaussian_linear
└ @ Main In[6]:8
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:04:57[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:14:02[39m
┌ Info: Task = gaussian_linear_uniform
└ @ Main In[6]:8
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:04:38[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:13:52[39m
┌ Info: Task = gaussian_mixture
└ @ Main In[6]:8
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:02:44[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:08:25[39m
┌ Info: Task = sir
└ @ Main In[6]:8
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:37:03[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 1:58:52[39m
┌ Info: Task = bernoulli_glm
└ @ Main In[6]:8
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:05:11[39m
[32mProgress: 100%|███████

"./results/bsl.csv"

In [None]:
#using DelimitedFiles
#using GLM
#X = readdlm("X.txt")
#y = readdlm("y.txt")
#y = reshape(y, length(y))
#glm(X, y, Gamma(), LogLink(), maxiter=1000)