# Save and load data
Utilize a prior and a simulator to create said dataset. Save a proportion as a training set, and part as a validation set.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
# remove top and right axis from plots
matplotlib.rcParams["axes.spines.right"] = False
matplotlib.rcParams["axes.spines.top"] = False

In [2]:
import sbi
from sbi.inference import SNPE
from sbi.inference.base import infer
from sbi.analysis import pairplot
import torch

In [3]:
# this is necessary to import modules from this repo
import sys
sys.path.append('..')
from src.scripts import io

In [4]:
def simulator(thetas):#, percent_errors):
    # convert to numpy array (if tensor):
    thetas = np.atleast_2d(thetas)
    # Check if the input has the correct shape
    if thetas.shape[1] != 2:
        raise ValueError("Input tensor must have shape (n, 2) where n is the number of parameter sets.")

    # Unpack the parameters
    if thetas.shape[0] == 1:
        # If there's only one set of parameters, extract them directly
        m, b = thetas[0, 0], thetas[0, 1]
    else:
        # If there are multiple sets of parameters, extract them for each row
        m, b = thetas[:, 0], thetas[:, 1]
    x = np.linspace(0, 100, 101)
    rs = np.random.RandomState()#2147483648)# 
    # I'm thinking sigma could actually be a function of x
    # if we want to get fancy down the road
    # Generate random noise (epsilon) based on a normal distribution with mean 0 and standard deviation sigma
    sigma = 5
    ε = rs.normal(loc=0, scale=sigma, size=(len(x), thetas.shape[0]))
    
    # Initialize an empty array to store the results for each set of parameters
    y = np.zeros((len(x), thetas.shape[0]))
    for i in range(thetas.shape[0]):
        m, b = thetas[i, 0], thetas[i, 1]
        y[:, i] = m * x + b + ε[:, i]
    return torch.Tensor(y.T)

In [5]:
num_dim = 2

low_bounds = torch.tensor([0, -10])
high_bounds = torch.tensor([10, 10])

prior = sbi.utils.BoxUniform(low = low_bounds, high = high_bounds)

Now sample from this prior and run it through the simulator.

In [13]:
params = prior.sample((5,))
xs = simulator(params)
print(r'$\theta$s', params, 'xs', xs)

$\theta$s tensor([[ 8.4720, -8.7245],
        [ 6.2986, -6.3061],
        [ 7.6696,  5.8846],
        [ 8.2143, -0.9697],
        [ 9.9554,  0.1756]]) xs tensor([[-9.2602e+00, -8.7352e-01,  2.0086e+01,  2.0491e+01,  1.7147e+01,
          3.1577e+01,  3.9759e+01,  4.9516e+01,  6.4271e+01,  7.1316e+01,
          7.5384e+01,  7.9887e+01,  9.3959e+01,  9.4245e+01,  1.0534e+02,
          1.2816e+02,  1.2935e+02,  1.3130e+02,  1.5088e+02,  1.5659e+02,
          1.5348e+02,  1.7466e+02,  1.8086e+02,  1.8679e+02,  1.9889e+02,
          1.9648e+02,  2.1448e+02,  2.1740e+02,  2.2847e+02,  2.2996e+02,
          2.4263e+02,  2.5454e+02,  2.5783e+02,  2.7095e+02,  2.7771e+02,
          2.8781e+02,  2.9804e+02,  3.0758e+02,  3.1737e+02,  3.1386e+02,
          3.2903e+02,  3.3053e+02,  3.4557e+02,  3.4054e+02,  3.6659e+02,
          3.7680e+02,  3.8313e+02,  3.9038e+02,  3.9683e+02,  4.0335e+02,
          4.2151e+02,  4.2710e+02,  4.3236e+02,  4.4121e+02,  4.5243e+02,
          4.6345e+02,  4.6805e+0

In [None]:
# generate a true dataset
theta_true = [1, 5]
y_true = simulator(theta_true)

# and visualize it
plt.clf()
plt.scatter(np.linspace(0, 100, 101),
            np.array(y_true), color = 'black')
plt.xlabel('x')
plt.ylabel('y')
plt.show()