In [3]:
import numpy as np
import torch
from sbi.utils import BoxUniform
from sbi.inference import SNPE, simulate_for_sbi

from sbi_for_diffusion_models.choice_model import choice_model_simulator_torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Training device:", device)

low = torch.tensor([0, 0, -2, 0.2, 0.0], dtype=torch.float32)
high = torch.tensor([1, 5,  2, 3.0, 1.0], dtype=torch.float32)

# 1) Prior for simulation (CPU) so simulate_for_sbi samples theta on CPU.
prior_sim = BoxUniform(low=low, high=high)

rng = np.random.default_rng(0)

def simulator(th: torch.Tensor) -> torch.Tensor:
    # Keep simulation on CPU (fastest for your current simulator).
    # (simulate_for_sbi will pass CPU tensors because prior_sim is on CPU.)
    return choice_model_simulator_torch(th, rng=rng, resample_invalid=True)

theta, x = simulate_for_sbi(
    simulator,
    prior_sim,
    num_simulations=10_000,
    simulation_batch_size=2048,
    num_workers=1,  # Windows-friendly
)

# 2) Prior for training (GPU). sbi asserts prior.device == training device. :contentReference[oaicite:2]{index=2}
prior_train = BoxUniform(low=low.to(device), high=high.to(device))

# 3) Train on GPU, keep data stored on CPU (recommended). :contentReference[oaicite:3]{index=3}
inference = SNPE(prior=prior_train, device=str(device))
density_estimator = inference.append_simulations(theta, x, data_device="cuda").train(
    training_batch_size=4096,
)
posterior = inference.build_posterior(density_estimator)

print("Density estimator device:", next(density_estimator.parameters()).device)


Training device: cuda:0


Running 10000 simulations.:   0%|          | 0/10000 [00:00<?, ?it/s]



 Neural network successfully converged after 283 epochs.Density estimator device: cuda:0
