In [None]:
import tensorflow as tf

In [None]:
tf.enable_eager_execution()

In [None]:
print('TensorFlow version: {}'.format(tf.__version__))

In [None]:
layers = tf.keras.layers

In [None]:
class MySimpleRNN(layers.Layer):
  def __init__(self, output_dim, return_sequences=False, **kwargs):
    super(MySimpleRNN, self).__init__(**kwargs)
    self.output_dim = output_dim
    self.return_sequences = return_sequences
    
  def build(self, input_shape):
    shape1 = tf.TensorShape((input_shape[-1], self.output_dim))
    shape2 = tf.TensorShape((self.output_dim, self.output_dim))
    # Create a trainable weight variable for this layer.
    self.kernel = self.add_weight(name='kernel',
                                  shape=shape1,
                                  initializer='uniform',
                                  trainable=True)
    self.recurrent_kernel = self.add_weight(name='recurrent_kernel',
                                            shape=shape2,
                                            initializer='uniform',
                                            trainable=True)
    self.bias = self.add_weight(name='bias',
                                shape=self.output_dim,
                                initializer='zeros',
                                trainable=True)
    # Make sure to call the `build` method at the end
    super(MySimpleRNN, self).build(input_shape)

  def call(self, inputs):
    batch = inputs.get_shape()[0]
    seq_length = inputs.get_shape()[1]
    h = tf.zeros([batch, self.output_dim])
    if self.return_sequences:
      h_list = []
    for i in range(seq_length):
      h = tf.math.tanh(tf.matmul(inputs[:, i, :], self.kernel) + 
                       tf.matmul(h, self.recurrent_kernel) + self.bias)
      if self.return_sequences:
        h_list.append(h)
    
    if self.return_sequences:
      h_list = np.array(h_list)
      h_list = np.transpose(h_list, axes=[1, 0, 2])
      return tf.convert_to_tensor(h_list)
    else:
      return h

In [None]:
model = tf.keras.Sequential()

In [None]:
#model.add(layers.SimpleRNN(units=3, return_sequences=True))
#model.add(layers.LSTM(2))
#model.add(layers.GRU(4, return_sequences=True))
#model.add(layers.LSTM(8, return_sequences=True))
#model.add(layers.GRU(16))
model.add(MySimpleRNN(3, return_sequences=True))
model.add(MySimpleRNN(3))
model.add(layers.Dense(1))

$$h_t = \tanh (Wh + Ux)$$
$$h_t = ReLU (Ux)$$


In [None]:
inputs = tf.random_normal([2, 4, 2])

outputs = model(inputs)

In [None]:
outputs.shape

In [None]:
outputs.numpy()

In [None]:
model.trainable_variables

In [None]:
opt = tf.train.AdamOptimizer()

In [None]:
for i in range(10):
  with tf.GradientTape() as tape:
    inputs = tf.random_normal([1, 4, 2])
    outputs = model(inputs)
    
    loss = tf.losses.mean_squared_error(labels=tf.ones(outputs.get_shape()),
                                        predictions=outputs)
  grad = tape.gradient(loss, model.variables)
  opt.apply_gradients(zip(grad, model.variables))

