In [19]:
import numpy as np
import pandas as pd
from scipy.special import expit, logit

import bayesflow as bf

import keras

In [2]:
N_TIME_POINTS = 5

class Simulator:
    def __init__(self, sigma_noise=0.1):
        """
        Simulator for the hierarchical AR(1) model:
            y[t] = alpha + beta y[t-1] + noise[t]
        starting from an initial value.

        Parameters:
            sigma_noise (float): noise standard deviation.
        """
        self.sigma_noise = sigma_noise
        self.initial_is_zero = False

    def __call__(self, params, n_time_points=N_TIME_POINTS):
        eta = np.array(params['eta'])
        alpha = np.array(params['alpha'])
        N = eta.size
        if eta.ndim > 1:
            raise ValueError("eta must be a 1D array.")

        # Generate noise for the increments: shape (N, n_time_points)
        noise = np.random.normal(
            loc=0,
            scale=self.sigma_noise,
            size=(N, n_time_points)
        )

        # Initialize trajectories with the initial condition
        traj = np.zeros((N, n_time_points))

        # Simulate the AR(1) process for each trajectory and each batch
        if not self.initial_is_zero:
            traj[:, 0] = noise[:, 0]
        for t in range(1, n_time_points):
            traj[:, t] = alpha + traj[:, t - 1] * eta + noise[:, t]

        return dict(observable=traj)


class Prior:
    def __init__(self):
        """
        Hierarchical prior for the AR(1) model.
        """
        self.alpha_mean = 0
        self.alpha_std = 1
        self.beta_mean = 0
        self.beta_std = 1 #0.1
        self.log_sigma_mean = 0 #np.log(0.1)
        self.log_sigma_std = 1 #0.5
        self.n_params_global = 3
        self.n_params_local = 1
        self.global_param_names = [r'$\alpha$', r'$\beta$', r'$\log \sigma$']

        # Build prior parameters as tensors.
        self.hyper_prior_means = np.array(
            [self.alpha_mean,
             self.beta_mean,
             self.log_sigma_mean],
        )
        self.hyper_prior_stds = np.array(
            [self.alpha_std,
             self.beta_std,
             self.log_sigma_std],
        )

        np.random.seed(0)
        self.simulator = Simulator()

        # Compute normalization constants
        test = self.sample(1000)
        self.norm_x_mean = np.mean(test['data'])
        self.norm_x_std = np.std(test['data'])
        self.norm_prior_global_mean = np.mean(test['global_params'], axis=0)
        self.norm_prior_global_std = np.std(test['global_params'], axis=0)
        self.norm_prior_local_mean = np.mean(test['local_params_raw'], axis=0)
        self.norm_prior_local_std = np.std(test['local_params_raw'], axis=0)

    def __call__(self, batch_size):
        return self.sample(batch_size=batch_size)

    @staticmethod
    def get_local_param_names(n_local_samples):
        return [r'$\eta_{' + str(i) + '}$' for i in range(n_local_samples)]

    def _sample_global(self):
        # Sample global parameters
        self.alpha = np.random.normal(loc=self.alpha_mean, scale=self.alpha_std)
        self.beta = np.random.normal(loc=self.beta_mean, scale=self.beta_std)
        self.log_sigma = np.random.normal(loc=self.log_sigma_mean, scale=self.log_sigma_std)
        return dict(alpha=self.alpha, beta=self.beta, log_sigma=self.log_sigma)

    def _sample_local(self, n_local_samples=1):
        # Sample local parameters
        eta_raw = np.random.normal(loc=0, scale=np.exp(self.log_sigma), size=n_local_samples)
        eta = self.transform_local_params(beta=self.beta, eta_raw=eta_raw)
        return dict(eta=eta, eta_raw=eta_raw)

    @staticmethod
    def transform_local_params(beta, eta_raw):
        # transform raw local parameters
        return 2*expit(beta + eta_raw)-1

    @staticmethod
    def back_transform_local_params(local_params):
        local_params_raw = logit((local_params + 1) / 2)
        local_params_raw[local_params_raw < -100] = -100
        local_params_raw[local_params_raw > 100] = 100
        return local_params_raw

    def sample(self, batch_size, n_local_samples=1, n_time_points=N_TIME_POINTS, get_grid=False):
        # Sample global and local parameters and simulate data
        global_params = np.zeros((batch_size, self.n_params_global))
        local_params_raw = np.zeros((batch_size, n_local_samples))
        local_params = np.zeros((batch_size, n_local_samples))
        data = np.zeros((batch_size, n_local_samples, n_time_points))

        for i in range(batch_size):
            global_sample = self._sample_global()
            local_sample = self._sample_local(n_local_samples=n_local_samples)
            sim_dict = {'alpha': global_sample['alpha'], 'eta': local_sample['eta']}
            sim = self.simulator(sim_dict, n_time_points=n_time_points)

            global_params[i] = [global_sample['alpha'], global_sample['beta'], global_sample['log_sigma']]
            local_params_raw[i] = local_sample['eta_raw']
            local_params[i] = local_sample['eta']
            data[i] = sim['observable']

        # Convert to tensors
        global_params = np.array(global_params)
        local_params = np.array(local_params)
        local_params_raw = np.array(local_params_raw)

        data = np.array(data)
        if get_grid:
            grid_size = int(np.sqrt(n_local_samples))
            data = data[:, :grid_size ** 2]
            data = data.reshape(batch_size, n_time_points, grid_size, grid_size)
            local_params = local_params[:, :grid_size ** 2]
            local_params_raw = local_params_raw[:, :grid_size ** 2]
            local_params = local_params.reshape(batch_size, grid_size, grid_size)
            local_params_raw = local_params_raw.reshape(batch_size, grid_size, grid_size)
        return dict(global_params=global_params, local_params=local_params,
                    local_params_raw=local_params_raw, data=data)

    def normalize_theta(self, theta, global_params):
        if global_params:
            return (theta - self.norm_prior_global_mean) / self.norm_prior_global_std
        return (theta - self.norm_prior_local_mean) / self.norm_prior_local_std

    def denormalize_theta(self, theta, global_params):
        if global_params:
            return theta * self.norm_prior_global_std + self.norm_prior_global_mean
        return theta * self.norm_prior_local_std + self.norm_prior_local_mean

    def normalize_data(self, x):
        return (x - self.norm_x_mean) / self.norm_x_std

## Experiment 1: 4 x 4 Grids

In [3]:
prior = Prior()
sim = Simulator()

# 10000 data points (same as compositional)

train_dict = prior.sample(
    batch_size=625,
    n_local_samples=16,
    n_time_points=5
)

train_dict['global_params'] = prior.normalize_theta(train_dict['global_params'], global_params=True)
train_dict['data'] = prior.normalize_data(train_dict['data'])

val_dict = prior.sample(
    batch_size=300,
    n_local_samples=16,
    n_time_points=5
)

val_dict['global_params'] = prior.normalize_theta(val_dict['global_params'], global_params=True)
val_dict['data'] = prior.normalize_data(val_dict['data'])

In [4]:
n_post_samples = 100

test_dict = prior.sample(
    batch_size=100,
    n_local_samples=16,
    n_time_points=5
)

test_dict['global_params'] = prior.normalize_theta(test_dict['global_params'], global_params=True)
test_dict['data'] = prior.normalize_data(test_dict['data'])

In [5]:
adapter = (
    bf.adapters.Adapter()
    .to_array()
    .convert_dtype("float64", "float32")
    .drop(["local_params", "local_params_raw"])
    .rename("global_params", "inference_variables")
    .rename("data", "summary_variables")
)

In [7]:
models = {
    "fm": (bf.networks.FlowMatching, {}),
    "coupling": (bf.networks.CouplingFlow, {
        "transform": "spline",
        "depth": 2
    }),
    "dm_cosine_F": (bf.networks.DiffusionModel, {
        "noise_schedule": "cosine", 
        "prediction_type": "F", }),
    "dm_cosine_v": (bf.networks.DiffusionModel, {
        "noise_schedule": "cosine", 
        "prediction_type": "velocity"}),   
    "dm_cosine_noise": (bf.networks.DiffusionModel, {
        "noise_schedule": "cosine", 
        "prediction_type": "noise"}),
}

In [None]:
results = {}

for model_name, model_packet in models.items():

    results[model_name] = None

    workflow_global = bf.BasicWorkflow(
        adapter=adapter,
        summary_network=bf.networks.DeepSet(summary_dim=5, dropout=0.1, depth=1),
        inference_network=model_packet[0](**model_packet[1]),
        checkpoint_filepath=f"bf_checkpoints/{model_name}_16"
    )

    history = workflow_global.fit_offline(
        train_dict, batch_size=32, epochs=500 if model_name != "coupling" else 100, validation_data=test_dict)

    test_global_samples = workflow_global.sample(conditions=test_dict, num_samples=n_post_samples)

    results[model_name] = {
        "RMSE": bf.diagnostics.metrics.root_mean_squared_error(test_global_samples, test_dict)['values'].mean(),
        "ECE": bf.diagnostics.metrics.calibration_error(test_global_samples, test_dict)['values'].mean(),
        "PC": bf.diagnostics.metrics.posterior_contraction(test_global_samples, test_dict)['values'].mean()
    }

## Experiment on 16 x 16 Grids

In [12]:
# 10000 data points (same as compositional)

train_dict = prior.sample(
    batch_size=40,
    n_local_samples=16 * 16,
    n_time_points=5
)

train_dict['global_params'] = prior.normalize_theta(train_dict['global_params'], global_params=True)
train_dict['data'] = prior.normalize_data(train_dict['data'])

val_dict = prior.sample(
    batch_size=300,
    n_local_samples=16 * 16,
    n_time_points=5
)

val_dict['global_params'] = prior.normalize_theta(val_dict['global_params'], global_params=True)
val_dict['data'] = prior.normalize_data(val_dict['data'])

In [None]:
larger_results = {}

for model_name, model_packet in models.items():

    larger_results[model_name] = None

    workflow_global = bf.BasicWorkflow(
        adapter=adapter,
        summary_network=bf.networks.DeepSet(summary_dim=5, dropout=0.1, depth=1),
        inference_network=model_packet[0](**model_packet[1]),
        checkpoint_filepath=f"bf_checkpoints/{model_name}_256"
    )

    history = workflow_global.fit_offline(
        train_dict, batch_size=16, epochs=200 if model_name != "coupling" else 100, validation_data=test_dict)

    test_global_samples = workflow_global.sample(conditions=test_dict, num_samples=n_post_samples)

    larger_results[model_name] = {
        "RMSE": bf.diagnostics.metrics.root_mean_squared_error(test_global_samples, test_dict)['values'].mean(),
        "ECE": bf.diagnostics.metrics.calibration_error(test_global_samples, test_dict)['values'].mean(),
        "PC": bf.diagnostics.metrics.posterior_contraction(test_global_samples, test_dict)['values'].mean()
    }

In [16]:
# Add our results (from paper)

results["Ours"] = {
    "RMSE": 0.09,
    "PC": 0.97
}

larger_results["Ours"] = {
    "RMSE": 0.08,
    "PC": 1.0
}

In [17]:
all_results = {
    "4x4": results,
    "16x16": larger_results
}

In [34]:
# ---- build a compact table: size Ã— method ----
rows = {}

for size, methods in all_results.items():
    row = {}
    for method, metrics in methods.items():
        rmse = metrics['RMSE']
        pc = metrics['PC']
        row[method] = f"{rmse:.4f} | {pc:.3f}"
    rows[size] = row

df = pd.DataFrame.from_dict(rows, orient='index')
df.index.name = "size"

In [35]:
df

Unnamed: 0_level_0,fm,coupling,dm_cosine_F,dm_cosine_v,dm_cosine_noise,Ours
size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4x4,0.0900 | 0.929,0.1082 | 0.865,0.0981 | 0.905,0.0952 | 0.925,0.1017 | 0.899,0.0900 | 0.970
16x16,0.1966 | 0.497,0.1954 | 0.541,0.2347 | 0.309,0.2184 | 0.458,0.6158 | 0.000,0.0800 | 1.000


In [22]:
df.to_csv("bf_results/metrics.csv", sep=";", index=True, header=True)

### Larger budget (64 x 64)

In [23]:
train_dict = prior.sample(
    batch_size=400,
    n_local_samples=16 * 16,
    n_time_points=5
)

train_dict['global_params'] = prior.normalize_theta(train_dict['global_params'], global_params=True)
train_dict['data'] = prior.normalize_data(train_dict['data'])

val_dict = prior.sample(
    batch_size=300,
    n_local_samples=16 * 16,
    n_time_points=5
)

val_dict['global_params'] = prior.normalize_theta(val_dict['global_params'], global_params=True)
val_dict['data'] = prior.normalize_data(val_dict['data'])

In [None]:
larger_results2 = {}

for model_name, model_packet in models.items():

    larger_results2[model_name] = None

    workflow_global = bf.BasicWorkflow(
        adapter=adapter,
        summary_network=bf.networks.DeepSet(summary_dim=5, dropout=0.1, depth=1),
        inference_network=model_packet[0](**model_packet[1]),
        checkpoint_filepath=f"bf_checkpoints/{model_name}_256_bigS"
    )

    history = workflow_global.fit_offline(
        train_dict, batch_size=16, epochs=200 if model_name != "coupling" else 100, validation_data=test_dict)

    test_global_samples = workflow_global.sample(conditions=test_dict, num_samples=n_post_samples)

    larger_results2[model_name] = {
        "RMSE": bf.diagnostics.metrics.root_mean_squared_error(test_global_samples, test_dict)['values'].mean(),
        "ECE": bf.diagnostics.metrics.calibration_error(test_global_samples, test_dict)['values'].mean(),
        "PC": bf.diagnostics.metrics.posterior_contraction(test_global_samples, test_dict)['values'].mean()
    }

In [25]:
larger_results2["Ours"] = {
    "RMSE": 0.08,
    "PC": 1.0
}

In [26]:
all_results2 = {
    "4x4": results,
    "16x16": larger_results,
    "16x16 (x10)": larger_results2
}

In [42]:
def round_sig(x, sig=2):
    return float(f"{x:.2g}")

rows = []

for size, methods in all_results2.items():
    row = {"size": size}
    for method, metrics in methods.items():
        rmse = round_sig(float(metrics["RMSE"]))
        pc   = round_sig(float(metrics["PC"]))
        row[(method, "RMSE")] = rmse
        row[(method, "PC")]   = pc
    rows.append(row)

df = pd.DataFrame(rows).set_index("size")
df.columns = pd.MultiIndex.from_tuples(df.columns)


In [45]:
# df.to_csv("bf_results/metrics.csv", sep=";", index=True, header=True)