# Bayesian Model Comparison on Gaussian Processes

This notebook demonstrates how to perform Bayesian model comparison with the `compass` package on the example of Gaussian processes.

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "8,9"

In [None]:
#from compass import ScoreBasedInferenceModel as SBIm
#from compass import ModelTransfuser as MTf

In [None]:
os.chdir('/export/home/bguenes/COMPASS/')
from src.compass import ScoreBasedInferenceModel as SBIm
from src.compass import ModelTransfuser as MTf

In [None]:
os.chdir('/export/home/bguenes/COMPASS/tutorials')
import torch

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## Hypothesis 1
$$ \begin{align*}
    \theta_1 &\sim \mathcal{N}(0, 3^2) \\
    x_1 &\sim \mathcal{N}(2 \cdot \sin(\theta_1), 0.5^2) \\
    x_2 &\sim \mathcal{N}(0.1 \cdot \theta_1^2, (0.5 \cdot |x_1|)^2)
\end{align*} $$

In [None]:
def gen_data_hyp1(int):
    theta1 = 3 * torch.randn(int)
    x1 = 2 * torch.sin(theta1) + torch.randn(int) * 0.5
    x2 = 0.1 * theta1**2 + 0.5*torch.abs(x1) * torch.randn(int)

    return theta1.unsqueeze(1), torch.stack([x1, x2],dim=1)

In [None]:
theta1, x1 = gen_data_hyp1(100_000)
val_theta1, val_x1 = gen_data_hyp1(1_000)

nodes_max = theta1.shape[1] + x1.shape[1]

## Hypothesis 2
$$ \begin{align*}
    \theta_1 &\sim \mathcal{N}(0, 3^2) \\
    x_1 &\sim \mathcal{N}(2 \cdot \cos(\theta_1), 0.5^2) \\
    x_2 &\sim \mathcal{N}(0.1 \cdot \theta_1^2, (0.5 \cdot |x_1|)^2)
\end{align*} $$

In [None]:
def gen_data_hyp2(int):
    theta1 = 3 * torch.randn(int)
    x1 = 2 * torch.cos(theta1) + torch.randn(int) * 0.5
    x2 = 0.1 * theta1**2 + 0.5*torch.abs(x1) * torch.randn(int)

    return theta1.unsqueeze(1), torch.stack([x1, x2],dim=1)

In [None]:
theta2, x2 = gen_data_hyp2(100_000)
val_theta2, val_x2 = gen_data_hyp2(1_000)

## Hypothesis 3
$$ \begin{align*}
    \theta_1 &\sim \mathcal{N}(0, 3^2) \\
    x_1 &\sim \mathcal{N}(0, 1^2) \\
    x_2 &\sim |\mathcal{N}(0, 2^2)|
\end{align*} $$

In [None]:
def gen_data_hyp3(int):
    theta1 = 3 * torch.randn(int)
    x1 = torch.randn(int)
    x2 = torch.abs(torch.randn(int)) * 2

    return theta1.unsqueeze(1), torch.stack([x1, x2],dim=1)

In [None]:
theta3, x3 = gen_data_hyp3(100_000)
val_theta3, val_x3 = gen_data_hyp3(1_000)

In [None]:
df1 = pd.DataFrame(torch.concatenate([val_theta1, val_x1], dim=1).numpy(), columns=['theta', 'x1', 'x2'])
df2 = pd.DataFrame(torch.concatenate([val_theta2, val_x2], dim=1).numpy(), columns=['theta', 'x1', 'x2'])
df3 = pd.DataFrame(torch.concatenate([val_theta3, val_x3], dim=1).numpy(), columns=['theta', 'x1', 'x2'])

df1["hypothesis"] = "hyp1"
df2["hypothesis"] = "hyp2"
df3["hypothesis"] = "hyp3"

combined_df = pd.concat([df1, df2, df3], axis=0)

sns.pairplot(combined_df, diag_kind='kde', hue='hypothesis', plot_kws=dict(alpha=0.3, s=2))
plt.show()

In [None]:
mtf = MTf(path="data/tutorial_Gaussians")

In [None]:
mtf.add_data("Hypothesis 1", theta1, x1, val_theta1, val_x1)
mtf.add_data("Hypothesis 2", theta2, x2, val_theta2, val_x2)
mtf.add_data("Hypothesis 3", theta3, x3, val_theta3, val_x3)

In [None]:
mtf.init_models(sde_type="vesde", sigma=3, depth=4, num_heads=5, hidden_size=20, mlp_ratio=4)

In [None]:
mtf.train_models(batch_size=256)

In [None]:
# Load the pretrained models

# sbim1 = SBIm.load("data/tutorial_Gaussians/Hypothesis 1.pt", device="cuda")
# sbim2 = SBIm.load("data/tutorial_Gaussians/Hypothesis 2.pt", device="cuda")
# sbim3 = SBIm.load("data/tutorial_Gaussians/Hypothesis 3.pt", device="cuda")

# mtf.add_model("Hypothesis 1", sbim1)
# mtf.add_model("Hypothesis 2", sbim2)
# mtf.add_model("Hypothesis 3", sbim3)

In [None]:
# Create test data
test_theta1, test_x1 = gen_data_hyp1(100)

# Compare models on test data
mtf.compare(x=test_x1, device="cuda", timesteps=500, method="dpm", order=1)

In [None]:
mtf.plot_comparison()

In [None]:
mtf.plot_attention(labels=[r"$\theta$", r"$x_1$", r"$x_2$"])