In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import tensorflow as tf
from tensorflow import keras

# Define the true distribution we want to approximate
true_dist = stats.beta(2, 5)

# Generate some samples from the true distribution
x_obs = true_dist.rvs(size=1000)

# Define the variational distribution as a Gaussian with learnable parameters
def q(x):
    return keras.Sequential([
        keras.layers.Dense(10, activation='relu'),
        keras.layers.Dense(2),
    ])(x)

# Define the ELBO objective function
def elbo(x_obs, q):
    # Sample from the variational distribution
    z = q(tf.cast(x_obs, dtype=tf.float32))

    # Compute the log likelihood of the samples under the true distribution
    log_likelihood = tf.reduce_sum(tf.math.log(true_dist.pdf(x_obs)))

    # Compute the KL divergence between the variational and true distributions
    kl_divergence = tf.reduce_sum(tf.math.log(z[:, 1]) - tf.math.log(z[:, 0]))

    # Compute the ELBO as the sum of the log likelihood and negative KL divergence
    return log_likelihood - kl_divergence

# Define the optimizer and training loop
optimizer = keras.optimizers.Adam(learning_rate=0.01)
for step in range(1000):
    with tf.GradientTape() as tape:
        loss = -elbo(x_obs, q)
    gradients = tape.gradient(loss, q.trainable_variables)
    optimizer.apply_gradients(zip(gradients, q.trainable_variables))
    if step % 100 == 0:
        print(f'Step {step}: ELBO = {-loss:.2f}')

# Plot the true distribution and the learned variational distribution
x_grid = np.linspace(0, 1, 1000)
plt.plot(x_grid, true_dist.pdf(x_grid), label='True distribution')
plt.plot(x_grid, np.exp(q(tf.cast(x_grid, dtype=tf.float32))[:, 1]), label='Variational distribution')
plt.legend()
plt.show()


ModuleNotFoundError: No module named 'tensorflow'