## Imports

In [16]:
import numpy as np
import pandas as pd
import sbibm
import torch
import altair_saver
import tensorflow_probability as tfp

from sbibm.metrics import c2st
from sbibm.visualisation import fig_posterior
from sbibm.metrics import c2st

## Helper function

In [108]:
def thin(X, length_out):
    assert length_out < X.shape[0]
    n = X.shape[0]
    keep = np.round(np.linspace(1, n, num = length_out)) - 1
    keep = keep.astype(int)
    return X[keep, :]

## Posterior plots

In [111]:
task_names = ["gaussian_linear", "gaussian_linear_uniform", "gaussian_mixture", "sir", "bernoulli_glm"]
algorithm_names = ["rula", "bsl"]

plot_n = 1000
for task_name in task_names:
    for algorithm_name in algorithm_names:
        posterior_samples = np.genfromtxt(f"./samples/{task_name}_{algorithm_name}.txt")
        posterior_samples = thin(posterior_samples, plot_n)
        posterior_samples = torch.tensor(posterior_samples)        
        
        n_params = posterior_samples.shape[1] 
        fig_size = 400-35*n_params
        scatter_size = 3.5+0.05*n_params

        fig = fig_posterior(
            task_name=task_name,
            observation=1,
            samples_tensor = posterior_samples,
            num_samples = plot_n,
            config = "streamlit",
            height = fig_size,
            width = fig_size,
            scatter_size = scatter_size,
            samples_name = algorithm_name
        )
        altair_saver.save(fig, f"./plots/{task_name}_{algorithm_name}.html")

    

(3000, 10)
(477, 10)


AssertionError: 

## Performance metrics
We will compute the classification accuracy twice. Once on all the samples (3000), then once on a thinned sample of 300 points. The size of the reference is matched to the size of the data to make sure the neural network did not have issues with the biased class sizes.

In [114]:
metrics = {
    "task": [],
    "algorithm":  [],
    "min_ess": [],
    "mean_ess": [],
    "max_ess": [],
    "c2st_all": [],
    "c2st_thinned": []
}

for task_name in task_names:
    for algoritm_name in algorithms:
        print(f"{task_name}: {algoritm_name}")
        posterior_samples = torch.tensor(np.genfromtxt(f"./samples/{task_name}_{algoritm_name}.txt"))
        task = sbibm.get_task(task_name)
        reference_samples = task.get_reference_posterior_samples(num_observation=1)
        
        ess = tfp.mcmc.effective_sample_size(posterior_samples)

        X = posterior_samples
        Y = reference_samples[1:X.shape[0], :]
        c2st_all = c2st(X, Y)
        
        X = thin(posterior_samples, 300)
        Y = reference_samples[1:X.shape[0], :]
        c2st_thinned = c2st(X, Y)        
        
        results["task"].append(task_name)
        results["algorithm"].append(algoritm_name)
        results["min_ess"].append(np.min(ess))
        results["mean_ess"].append(np.mean(ess))
        results["max_ess"].append(np.max(ess))
        results["c2st_all"].append(c2st_all)
        results["c2st_thinned"].append(c2st_thinned)



gaussian_linear: rula
gaussian_linear: bsl




gaussian_linear_uniform: rula
gaussian_linear_uniform: bsl




gaussian_mixture: rula




gaussian_mixture: bsl




sir: rula




sir: bsl
bernoulli_glm: rula




bernoulli_glm: bsl




In [97]:
df = pd.Dataframe(metrics)
df.to_csv("results/metrics.csv")