In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import keras
from keras import layers
import numpy as np
import matplotlib.pyplot as plt
import gym

In [2]:
from vae import VAE, create_decoder, create_encoder
from transition import TransitionModel
from agent import DAIFAgent

In [3]:
from util import random_observation_sequence, transform_observations

In [4]:
enc = create_encoder(2, 2, [20])
dec = create_decoder(2, 2, [20])
tran = TransitionModel(2, 1)

env = gym.make('MountainCarContinuous-v0')

Metal device set to: Apple M1 Pro


2022-07-12 05:24:29.109269: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-07-12 05:24:29.109580: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [30]:
import tensorflow as tf
import tensorflow_probability as tfp
import keras
from keras import layers
import numpy as np


class TransitionGRU(keras.Model):

    def __init__(self, latent_dim, action_dim, seq_length, hidden_units, output_dim, stateful=True, **kwargs):
        super(TransitionGRU, self).__init__(**kwargs)


        self.latent_dim = latent_dim
        self.action_dim = action_dim
        self.seq_length = seq_length
        self.hidden_units = hidden_units
        self.output_dim = output_dim


        inputs = layers.Input(shape=(None, self.latent_dim + self.action_dim))
        h_states, final_state = layers.GRU(hidden_units, activation="tanh", return_sequences=True, return_state=True, stateful=stateful, name="gru")(inputs)

        # TODO is this correctly getting the last hidden state or the first???
        z_mean = layers.Dense(latent_dim, name="z_mean")(final_state)  # all batch last time step all dimension
        z_log_sd = layers.Dense(latent_dim, name="z_log_sd")(final_state)
        z_stddev = tf.exp(z_log_sd)

        self.transition_model = keras.Model(inputs, [z_mean, z_stddev, final_state, h_states], name="transition")

        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    def call(self, inputs, training=None, mask=None):
        return self.transition_model(inputs)


    @property
    def metrics(self):
        return [self.kl_loss_tracker]

    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x, y = data
        mu, stddev = y

        with tf.GradientTape() as tape:
            z_mean, z_stddev, final_state, h_states = self.transition_model(x, training=True)  # Forward pass

            # Compute the loss value
            pred_dist = tfp.distributions.MultivariateNormalDiag(loc=z_mean, scale_diag=z_stddev)
            true_dist = tfp.distributions.MultivariateNormalDiag(loc=mu, scale_diag=stddev)

            # TODO make sure this is the correct order of terms
            kl_loss = tfp.distributions.kl_divergence(pred_dist, true_dist)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(kl_loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics (includes the metric that tracks the loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "kl_loss": self.kl_loss_tracker.result()
        }


In [31]:
num_seqs = 1000
seq_length = 5
ob_seqs = []
next_obs = []

for i in range(num_seqs):
    o, a, r = random_observation_sequence(env, seq_length)

    train = np.concatenate([o[:-1], a], axis=1)
    test = o[-1]

    ob_seqs.append(train)
    next_obs.append(test)

ob_seqs = np.array(ob_seqs)
next_obs = np.array(next_obs)
ob_seqs.shape

ob_seqs_stddev = np.ones_like(ob_seqs)
next_obs_stddev = np.ones_like(next_obs)

ob_seqs.shape

(1000, 5, 3)

In [32]:
next_obs.shape

(1000, 2)

In [34]:
m = TransitionGRU(2, 1, 10, 30, 2, stateful=False)

m.compile(optimizer="Adam")
m.build((None, None, 3))
m.summary()

Model: "transition_gru_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 transition (Functional)     [(None, 2),               3274      
                              (None, 2),                         
                              (None, 30),                        
                              (None, None, 30)]                  
                                                                 
Total params: 3,276
Trainable params: 3,274
Non-trainable params: 2
_________________________________________________________________


In [29]:
m.transition_model.layers[1].reset_states()

IndexError: list index out of range

In [26]:
m.fit(ob_seqs, (next_obs, next_obs_stddev), batch_size=20, epochs=30)

Epoch 1/30


2022-07-12 05:42:06.299927: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-12 05:42:06.428940: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-12 05:42:06.484667: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x17bb87370>

In [338]:
res = m(ob_seqs[0:10])
res[0]

<tf.Tensor: shape=(10, 2), dtype=float32, numpy=
array([[-5.8090693e-01,  8.7059429e-04],
       [-5.2863246e-01,  3.1522992e-03],
       [-4.3379432e-01, -8.7528732e-03],
       [-4.3787414e-01, -5.1598563e-03],
       [-5.8681363e-01,  2.7444272e-04],
       [-4.9155015e-01, -2.5757030e-03],
       [-5.0213182e-01, -1.5339281e-03],
       [-5.7871789e-01,  3.1496743e-03],
       [-5.5853301e-01,  3.3286274e-03],
       [-4.2998409e-01, -4.8398916e-03]], dtype=float32)>

In [339]:
next_obs[0:10]

array([[-5.81220269e-01,  5.59085340e-04],
       [-5.32187581e-01,  2.53276480e-03],
       [-4.32790160e-01, -7.78845651e-03],
       [-4.35192347e-01, -4.59486013e-03],
       [-5.87698460e-01, -8.86931142e-04],
       [-4.92360324e-01, -1.38492603e-03],
       [-5.02152085e-01, -4.14562441e-04],
       [-5.78702986e-01,  2.86695943e-03],
       [-5.59793115e-01,  3.54842329e-03],
       [-4.26397473e-01, -4.90277866e-03]])

In [340]:
res[2]

<tf.Tensor: shape=(10, 30), dtype=float32, numpy=
array([[ 1.76516443e-01,  1.54475287e-01, -5.12306057e-02,
         1.64845765e-01,  2.68837273e-01, -9.95857716e-02,
         7.79657215e-02, -1.10403851e-01, -1.08879849e-01,
        -1.22801609e-01,  3.41023579e-02, -8.55413154e-02,
         1.35252342e-01, -5.53389899e-02, -2.05101758e-01,
         9.60189849e-02, -2.62633413e-01,  2.84929663e-01,
         2.07988352e-01, -1.35656670e-01,  1.67700097e-01,
        -9.03860945e-03,  9.67715830e-02,  1.41193375e-01,
        -1.73646688e-01, -1.49096355e-01,  1.63787216e-01,
         1.34333670e-01, -1.35705829e-01, -1.85656726e-01],
       [-4.13602814e-02,  1.25444561e-01,  1.13072321e-01,
         5.97583316e-02,  1.48286954e-01, -1.09492682e-01,
        -6.71713203e-02, -1.93654671e-01, -2.18552351e-01,
        -1.57539248e-01,  1.89289406e-01, -2.24954244e-02,
         1.34446856e-03, -4.91290167e-03, -2.39228070e-01,
         1.89485967e-01, -3.58797014e-01,  2.12003320e-01,
     

In [341]:
res[3]

<tf.Tensor: shape=(10, 5, 30), dtype=float32, numpy=
array([[[ 0.08676665,  0.0665729 , -0.01751424, ...,  0.04447326,
         -0.01017037, -0.07514461],
        [ 0.04867286,  0.10080533,  0.05040885, ...,  0.05720273,
         -0.10691113, -0.10694263],
        [ 0.15783645,  0.12385118, -0.07167391, ...,  0.10117968,
         -0.06072072, -0.15788542],
        [ 0.20955351,  0.14058515, -0.09838694, ...,  0.12855455,
         -0.07163367, -0.1843247 ],
        [ 0.17651644,  0.15447529, -0.05123061, ...,  0.13433367,
         -0.13570583, -0.18565673]],

       [[ 0.15751047,  0.06191986, -0.11045828, ...,  0.06034369,
          0.04968761, -0.08267669],
        [ 0.08696802,  0.09508388,  0.00635655, ...,  0.06304765,
         -0.08224013, -0.10179211],
        [-0.00656156,  0.11269982,  0.09019252, ...,  0.05995458,
         -0.1843041 , -0.10718634],
        [-0.00765619,  0.12198227,  0.07892087, ...,  0.07446641,
         -0.1967938 , -0.12551175],
        [-0.04136028,  0.12

In [255]:
# def gru(input_dim, seq_length, hidden_units, output_dim):
#
#     inputs = layers.Input(shape=(None, input_dim), batch_size=10)
#     out_states, h_states, *everything_else = layers.GRU(hidden_units, activation="tanh", stateful=True, return_sequences=True)(inputs)
#     h = layers.Dense(output_dim)(out_states)
#
#     model = keras.Model(inputs, h)
#
#     return model, h_states


input_dim = 3
hidden_units = 30
seq_length = 10
output_dim = 2


inputs = layers.Input(shape=(None, input_dim))
out_states = layers.GRU(2, activation="tanh", stateful=False, return_sequences=True)(inputs)
h = layers.Dense(output_dim)(out_states)

m = keras.Model(inputs, [h, out_states])

inputs2 = layers.Input(shape=(None, input_dim), batch_size=20)
out_states2 = layers.GRU(2, activation="tanh", stateful=False, return_sequences=False)(inputs2)
h2 = layers.Dense(output_dim)(out_states2)

m2 = keras.Model(inputs2, h2)

In [256]:
# m, h_states = gru(3, 10, 30, 2)

m.compile(optimizer="Adam", loss=tf.keras.losses.MeanSquaredError())
m.summary()

Model: "model_47"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_86 (InputLayer)       [(None, None, 3)]         0         
                                                                 
 gru_77 (GRU)                (None, None, 2)           42        
                                                                 
 dense_37 (Dense)            (None, None, 2)           6         
                                                                 
Total params: 48
Trainable params: 48
Non-trainable params: 0
_________________________________________________________________


In [237]:
m2.compile(optimizer="Adam", loss=tf.keras.losses.MeanSquaredError())
m2.summary()

Model: "model_40"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_77 (InputLayer)       [(20, None, 3)]           0         
                                                                 
 gru_68 (GRU)                (20, 2)                   42        
                                                                 
 dense_30 (Dense)            (20, 2)                   6         
                                                                 
Total params: 48
Trainable params: 48
Non-trainable params: 0
_________________________________________________________________


In [257]:
m.fit(ob_seqs, next_obs, batch_size=20, epochs=10)

Epoch 1/10


ValueError: in user code:

    File "/Users/Ethan/miniconda3/envs/tf_daif/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "/Users/Ethan/miniconda3/envs/tf_daif/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/Ethan/miniconda3/envs/tf_daif/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "/Users/Ethan/miniconda3/envs/tf_daif/lib/python3.8/site-packages/keras/engine/training.py", line 860, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/Users/Ethan/miniconda3/envs/tf_daif/lib/python3.8/site-packages/keras/engine/training.py", line 918, in compute_loss
        return self.compiled_loss(
    File "/Users/Ethan/miniconda3/envs/tf_daif/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/Users/Ethan/miniconda3/envs/tf_daif/lib/python3.8/site-packages/keras/losses.py", line 141, in __call__
        losses = call_fn(y_true, y_pred)
    File "/Users/Ethan/miniconda3/envs/tf_daif/lib/python3.8/site-packages/keras/losses.py", line 245, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Users/Ethan/miniconda3/envs/tf_daif/lib/python3.8/site-packages/keras/losses.py", line 1329, in mean_squared_error
        return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)

    ValueError: Dimensions must be equal, but are 10 and 20 for '{{node mean_squared_error/SquaredDifference}} = SquaredDifference[T=DT_FLOAT](model_47/dense_37/BiasAdd, IteratorGetNext:1)' with input shapes: [20,10,2], [20,2].


In [247]:
res = m(ob_seqs[0:20])

In [248]:
res[0]

<tf.Tensor: shape=(20, 10, 2), dtype=float32, numpy=
array([[[-0.03126741, -0.05007739],
        [-0.02838155, -0.00308725],
        [-0.03572483, -0.00703882],
        [-0.12308189, -0.33903465],
        [-0.09099328, -0.21175347],
        [-0.15656376, -0.48039535],
        [-0.17280677, -0.56211597],
        [-0.09679789, -0.25665036],
        [-0.09596224, -0.23660131],
        [-0.07447534, -0.14166676]],

       [[-0.09012927, -0.25503004],
        [-0.06786623, -0.12630445],
        [-0.06517889, -0.0879531 ],
        [-0.0753606 , -0.10781813],
        [-0.15388907, -0.41995582],
        [-0.16208252, -0.4539302 ],
        [-0.09656087, -0.1943801 ],
        [-0.12658402, -0.30111074],
        [-0.09610697, -0.17922345],
        [-0.1653356 , -0.4642073 ]],

       [[ 0.00175663,  0.08846912],
        [-0.10092825, -0.24563871],
        [-0.12627089, -0.31458697],
        [-0.09916592, -0.19435771],
        [-0.07646907, -0.09792791],
        [-0.09629729, -0.16202733],
       

In [223]:
res[1]

<tf.Tensor: shape=(10, 10, 2), dtype=float32, numpy=
array([[[-0.36397788,  0.0294749 ],
        [-0.48029938,  0.01573997],
        [-0.51549274, -0.00554507],
        [-0.50096333, -0.02819019],
        [-0.51849884, -0.03603037],
        [-0.4956515 , -0.04518281],
        [-0.4919299 , -0.04698842],
        [-0.5167043 , -0.04562606],
        [-0.5176949 , -0.04506196],
        [-0.5242841 , -0.04514759]],

       [[-0.3648733 ,  0.04206948],
        [-0.4807293 ,  0.04454088],
        [-0.51710445,  0.03400522],
        [-0.5260778 ,  0.02312428],
        [-0.5013063 ,  0.01332644],
        [-0.50671595,  0.01171992],
        [-0.52894664,  0.00859705],
        [-0.5185139 ,  0.00654676],
        [-0.52903926,  0.00418232],
        [-0.50056595,  0.00295787]],

       [[-0.35582137,  0.06193205],
        [-0.4635372 ,  0.05734254],
        [-0.50165814,  0.04938255],
        [-0.52453876,  0.04110029],
        [-0.5341751 ,  0.03280196],
        [-0.5287185 ,  0.02695066],
       