# My DAI Agent

In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import keras
from keras import layers
import numpy as np
import matplotlib.pyplot as plt

# Convolutional VAE

In [2]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [3]:
latent_dim = 2

encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()

Metal device set to: Apple M1 Pro
Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 conv2d (Conv2D)                (None, 14, 14, 32)   320         ['input_1[0][0]']                
                                                                                                  
 conv2d_1 (Conv2D)              (None, 7, 7, 64)     18496       ['conv2d[0][0]']                 
                                                                                                  
 flatten (Flatten)              (None, 3136)         0           ['conv2d_1[0][0]']               
                                                          

2022-07-05 07:38:40.520499: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-07-05 07:38:40.520624: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [4]:
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu")(latent_inputs)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 2)]               0         
                                                                 
 dense_1 (Dense)             (None, 3136)              9408      
                                                                 
 reshape (Reshape)           (None, 7, 7, 64)          0         
                                                                 
 conv2d_transpose (Conv2DTra  (None, 14, 14, 64)       36928     
 nspose)                                                         
                                                                 
 conv2d_transpose_1 (Conv2DT  (None, 28, 28, 32)       18464     
 ranspose)                                                       
                                                                 
 conv2d_transpose_2 (Conv2DT  (None, 28, 28, 1)        289 

In [5]:
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

In [6]:
# (x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
# mnist_digits = np.concatenate([x_train, x_test], axis=0)
# mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255
#
# vae = VAE(encoder, decoder)
# vae.compile(optimizer=tf.keras.optimizers.Adam())
# vae.fit(mnist_digits, epochs=30, batch_size=128)

## Non convolutional

In [7]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon


latent_dim = 2

encoder_inputs = keras.Input(shape=(28*28))
x = layers.Dense(16, activation="relu")(encoder_inputs)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()


latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(16, activation="relu")(latent_inputs)
x = layers.Dense(16, activation="relu")(x)
decoder_outputs = layers.Dense(28*28, activation="sigmoid")(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()



class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            # reconstruction_loss = tf.reduce_mean(keras.losses.binary_crossentropy(data, reconstruction), axis=0)
            reconstruction_loss = keras.losses.binary_crossentropy(data, reconstruction)
            reconstruction_loss = reconstruction_loss * data.shape[1]  # Think I need this to balance out for the data set. It makes it work but I don't know why

            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            # kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            kl_loss = tf.reduce_sum(kl_loss, axis=1)
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 784)]        0           []                               
                                                                                                  
 dense_2 (Dense)                (None, 16)           12560       ['input_3[0][0]']                
                                                                                                  
 dense_3 (Dense)                (None, 16)           272         ['dense_2[0][0]']                
                                                                                                  
 z_mean (Dense)                 (None, 2)            34          ['dense_3[0][0]']                
                                                                                            

In [8]:
# (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
#
# x_train = x_train.reshape((60000, 28*28))
# x_test = x_test.reshape((10000, 28*28))
#
# mnist_digits = np.concatenate([x_train, x_test], axis=0)
# mnist_digits = mnist_digits.astype("float32") / 255
#
#
# vae = VAE(encoder, decoder)
# vae.compile(optimizer=tf.keras.optimizers.Adam())
# vae.fit(mnist_digits, epochs=30, batch_size=128)

In [9]:
def plot_label_clusters(z_mean, labels):
    # display a 2D plot of the digit classes in the latent space
    plt.figure(figsize=(12, 10))
    plt.scatter(z_mean[:, 0], z_mean[:, 1], c=labels)
    plt.colorbar()
    plt.xlabel("z[0]")
    plt.ylabel("z[1]")
    plt.show()

In [10]:
latent_reps = vae.encoder.predict(mnist_digits)
latent_reps

NameError: name 'vae' is not defined

In [None]:
latent_reps = vae.encoder(mnist_digits)
latent_reps

In [11]:
labels = np.concatenate([y_train, y_test], axis=0)
plot_label_clusters(latent_reps[0], labels)

NameError: name 'y_train' is not defined

In [12]:
latent_reps[0].shape

NameError: name 'latent_reps' is not defined

In [13]:
plt.scatter(latent_reps[0][0][0], latent_reps[0][0][1])

NameError: name 'latent_reps' is not defined

# Transition Model

Because the recurrent states are deterministic, the variance of the predicted latent distributions does not include prediction errors, which could be an improvement for future work.

In [14]:
latent_dim = 2
action_dim = 1

hidden_dim = 64

planning_window = 12

transition_inputs = layers.Input((28, 28))
h = layers.GRU(hidden_dim, input_shape=(28, 28))(transition_inputs)
transition_outputs = layers.Dense(10)(h)

transition_model = keras.Model(transition_inputs, transition_outputs)
transition_model.summary()

# h = layers.GRU(hidden_dim, input_shape=latent_dim+action_dim, activation="relu", return_sequences=True, return_state=True)

# transition = keras.Model(h)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 28, 28)]          0         
                                                                 
 gru (GRU)                   (None, 64)                18048     
                                                                 
 dense_7 (Dense)             (None, 10)                650       
                                                                 
Total params: 18,698
Trainable params: 18,698
Non-trainable params: 0
_________________________________________________________________


In [15]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x_train.astype("float32")/255
x_test = x_test.astype("float32")/255

transition = keras.Sequential()
transition.add(layers.GRU(hidden_dim, input_shape=(28, 28)))
transition.add(layers.Dense(10))

transition.compile(optimizer=tf.keras.optimizers.Adam(),
                   loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                   metrics=tf.keras.metrics.SparseCategoricalAccuracy())

transition.fit(x_train, y_train, epochs=30, batch_size=128)

Epoch 1/30


2022-07-05 07:38:43.247374: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-07-05 07:38:43.630035: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-05 07:38:43.742042: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-05 07:38:44.599795: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


 92/469 [====>.........................] - ETA: 5s - loss: 1.8778 - sparse_categorical_accuracy: 0.3325


KeyboardInterrupt



In [None]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x_train.astype("float32")/255
x_test = x_test.astype("float32")/255

transition_inputs = layers.Input((28, 28))
h, res = layers.GRU(hidden_dim, return_sequences=True, return_state=True)(transition_inputs)
transition_outputs = layers.Dense(10)(res)

transition_model = keras.Model(transition_inputs, transition_outputs)

transition_model.compile(optimizer=tf.keras.optimizers.Adam(),
                   loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                   metrics=tf.keras.metrics.SparseCategoricalAccuracy())

transition_model.fit(x_train, y_train, epochs=30, batch_size=128)
transition_model.evaluate(x_test, y_test)

In [None]:
transition_model.fit(x_train[0:1], y_train[0:1])
transition_model.evaluate(x_test[0:1], y_test[0:1])

The transition model should take a sequence of length H of latent state action pairs and then be able to predict the next latent state in the sequence.
It should be trained by comparing the predicted latent state to the actual latent state.

Batches should be constructed by predicting an action

In [16]:
class TransitionGRU(keras.Model):

    def __init__(self, latent_dim: int, policy_dim: int, dynamic_dim: int, num_rnn_layers: int):
        super(TransitionGRU, self).__init__(**kwargs)
        # Hyperparameters
        self.latent_dim = latent_dim
        self.policy_dim = policy_dim
        self.dynamic_dim = dynamic_dim
        self.num_rnn_layers = num_rnn_layers

        # Neural networks:
        self.gru = layers.GRU(input_size=latent_dim + policy_dim, hidden_size=dynamic_dim, num_layers=num_rnn_layers,)
        self.mean_net = layers.Dense(dynamic_dim, latent_dim)
        self.std_net = layers.Dense(dynamic_dim, latent_dim)

## A Complete Simple Agent

EFE and FEEF are only used for policy selection

### Training
Inputs: a time series of observations and actions
Objective: Improve the perception model through the VFE equation

#### Perception
Learn to reconstruct observations from latent states

#### Transition
Learn to predict next latent state from previous 10 states and actions

### Selecting Actions
Inputs: a time series of observations and actions
No learning to be done here. Just sample actions and select a policy

In [17]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon


latent_dim = 2

def create_encoder():

    encoder_inputs = keras.Input(shape=(28*28))
    x = layers.Dense(16, activation="relu")(encoder_inputs)
    x = layers.Dense(16, activation="relu")(x)
    z_mean = layers.Dense(latent_dim, name="z_mean")(x)
    z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
    z = Sampling()([z_mean, z_log_var])
    encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
    return encoder



def create_decoder():

    latent_inputs = keras.Input(shape=(latent_dim,))
    x = layers.Dense(16, activation="relu")(latent_inputs)
    x = layers.Dense(16, activation="relu")(x)
    decoder_outputs = layers.Dense(28*28, activation="sigmoid")(x)
    decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
    return decoder


def create_transition(latent_dim, action_dim):

    # transition_inputs = layers.Input((28, 28))
    # h, res = layers.GRU(hidden_dim, return_sequences=True, return_state=True)(transition_inputs)
    # transition_outputs = layers.Dense(10)(res)
    #
    # transition_model = keras.Model(transition_inputs, transition_outputs)

    transition_inputs = layers.Input(latent_dim + action_dim)
    h = layers.Dense(16)(transition_inputs)
    z_mean = layers.Dense(latent_dim, name="z_mean")(h)
    z_log_var = layers.Dense(latent_dim, name="z_log_var")(h)
    transition_model = keras.Model(transition_inputs, [z_mean, z_log_var], name="transition")

    return transition_model


class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            # reconstruction_loss = tf.reduce_mean(keras.losses.binary_crossentropy(data, reconstruction), axis=0)
            reconstruction_loss = keras.losses.binary_crossentropy(data, reconstruction)
            reconstruction_loss = reconstruction_loss * data.shape[1]  # Think I need this to balance out for the data set. It makes it work but I don't know why

            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            # kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            kl_loss = tf.reduce_sum(kl_loss, axis=1)
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

In [18]:
enc = create_encoder()
dec = create_decoder()
tran = create_transition(2, 1)

model_vae = VAE(enc, dec)
model_vae.compile(optimizer=tf.keras.optimizers.Adam())

In [19]:
class DAIFAgent(keras.Model):

    def __init__(self, prior, enc, dec, tran, planning_horizon, n_policy_candidates):

        super(DAIFAgent, self).__init__()

        self.prior = prior
        self.planning_horizon = planning_horizon
        self.policy_iterations = n_policy_candidates

        # encoder
        self.enc = enc

        # decoder
        # takes latent state and outputs observation
        self.dec = dec

        # full
        self.model_vae = VAE(enc, dec)
        self.model_vae.compile(optimizer=tf.keras.optimizers.Adam())

        # transition
        # takes action plus last state and outputs next state
        self.tran = tran


    def select_action(self, observation):

        best_policy = None
        min_FEEF = np.inf
        for _ in range(self.n_policy_candidates):

            # try a policy
            policy, policy_posteriors, policy_vars, next_likelihoods, next_posteriors, next_posteriors_vars = self.sample_policy_and_trajectory(observation)

            # evaluate the FEEF
            FEEF = self.evaluate_policy(policy_posteriors, policy_vars, next_likelihoods, next_posteriors, next_posteriors_vars)

            if FEEF < min_FEEF:
                best_policy = policy

        return best_policy


    def train_vae(self, observation):
        model_vae.fit(observation)


    def train_transition(self, z_t, z_t_minus_one, a_t_minus_one):
        tran_inputs = z_t_minus_one + a_t_minus_one
        self.tran.fit(tran_inputs, z_t)


    def sample_policy_and_trajectory(self, z_t_minus_one):
        """
        sample an action
        :param z_t_minus_one:
        :return:
        """

        # generate a random policy
        policy = np.random.random(self.planning_horizon)

        prev_latent = z_t_minus_one
        policy_posteriors = []
        policy_vars = []

        # find the predicted latent states from the transition model
        for action in policy:
            tran_input = prev_latent + action
            next_latent_mean, next_latent_log_var = self.tran.predict(tran_input)

            policy_posteriors.append(next_latent_mean)
            policy_vars.append(next_latent_log_var)

            prev_latent = next_latent_mean

        # find the predicted observations for each latent based on the likelihood model
        next_likelihoods = self.dec(policy_posteriors)

        # find the predicted latent for each of these observations based on the encoder
        next_posteriors, next_posteriors_vars, next_posteriors_z = self.enc(next_likelihoods)

        return policy, policy_posteriors, policy_vars, next_likelihoods, next_posteriors, next_posteriors_vars


    def evaluate_policy(self, policy_posteriors, policy_vars, predicted_likelihood, predicted_posterior, predicted_posterior_vars):

        return FEEF(policy_posteriors, policy_vars, predicted_likelihood, predicted_posterior, predicted_posterior_vars)


    def FEEF(self, policy_posteriors, policy_vars, predicted_likelihood, predicted_posterior, predicted_posterior_vars):
        """
        Compute the FEEF for policy selection
        :param policy_posteriors:
        :param predicted_likelihood:
        :param predicted_posterior:
        :return:
        """

        # !!!! evaluate the EXTRINSIC KL divergence !!!!

        # convert to normal distributions
        # TODO Why is the stddev 1s here? I think because we assume it is.
        likelihood_dist = tfp.distributions.MultivariateNormalDiag(loc=predicted_likelihood, scale_diag=np.ones_like(predicted_likelihood))

        #TODO double check this to make sure it's evaluating correctly with the sum part
        #TODO do the prior model
        kl_extrinsic = tfp.distributions.kl_divergence(likelihood_dist, self.prior_model).sum(dim=-1)  # Sum over components, keep time-steps and batches

        # !!!! evaluate the KL INTRINSIC part !!!!

        policy_posteriors_dist = tfp.distributions.MultivariateNormalDiag(loc=policy_posteriors, scale_diag=policy_vars)
        predicted_posterior_dist = tfp.distributions.MultivariateNormalDiag(loc=predicted_posterior, scale_diag=predicted_posterior_vars)

        # TODO fix what the sum is doing here to make sure the dimension is correct
        kl_intrinsic = tfp.distributions.kl_divergence(policy_posteriors_dist, predicted_posterior_dist).sum(dim=2)  # Sum over components, keep

        return kl_extrinsic - kl_intrinsic


    def EFE(self, policy_posteriors, predicted_likelihood, predicted_posterior):
        """
        Compute the EFE for policy selection
        :param policy_posteriors:
        :param predicted_likelihood:
        :param predicted_posterior:
        :return:
        """
        pass


In [20]:
np.arange(10).reshape((5,2)).astype("float")

array([[0., 1.],
       [2., 3.],
       [4., 5.],
       [6., 7.],
       [8., 9.]])

In [21]:
t1 = tf.convert_to_tensor(np.arange(10).reshape((5,2)).astype("float"))
t2 = tf.convert_to_tensor(np.arange(10).reshape(5,2).astype("float") + 1)
sigma1 = tf.convert_to_tensor(np.ones((5, 2)))
sigma2 = tf.convert_to_tensor(np.ones((5, 2)))

dist1 = tfp.distributions.MultivariateNormalDiag(loc=t1, scale_diag=sigma1)
dist2 = tfp.distributions.MultivariateNormalDiag(loc=t2, scale_diag=sigma2)

print(dist1.mean())
print(dist2.mean())
#
#
tfp.distributions.kl_divergence(dist1, dist2)

tf.Tensor(
[[0. 1.]
 [2. 3.]
 [4. 5.]
 [6. 7.]
 [8. 9.]], shape=(5, 2), dtype=float64)
tf.Tensor(
[[ 1.  2.]
 [ 3.  4.]
 [ 5.  6.]
 [ 7.  8.]
 [ 9. 10.]], shape=(5, 2), dtype=float64)


<tf.Tensor: shape=(5,), dtype=float64, numpy=array([1., 1., 1., 1., 1.])>

In [24]:
np.ones_like(t1)

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]])

In [36]:
np.ones_like(np.zeros((3,3)))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [72]:
enc = create_encoder()
dec = create_decoder()
tran = create_transition(2, 1)

model_vae = VAE(enc, dec)
model_vae.compile(optimizer=tf.keras.optimizers.Adam())

agent = DAIFAgent("prior", enc, dec, tran, 10, 100)

## Test environment

In [37]:
import gym

In [55]:
env = gym.make('MountainCar-v0')
env.action_space.seed(42)

observation, info = env.reset()
print(observation, info)

for _ in range(1000):

    observation, reward, done, info = env.step()

    print(observation, reward, done, info)

    if done:
        observation, info = env.reset(return_info=True)

env.close()

-0.433257 0.0


NameError: name 'agent' is not defined