In [None]:
import sys

import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

In [None]:
print(f'''
    Python {sys.version}
TensorFlow {tf.__version__}
''')

# Basics

In [None]:
def memory_sum(seq_len, weight=1, bias=1, h_weight=1, mem_len=0):
    seq = [1]
    while len(seq) < seq_len:
        input_val = seq[-1]
        if mem_len > 0:
            mem_sum = sum(seq[-mem_len-1:-1])
        elif mem_len == 0:
            mem_sum = 0
        else:
            mem_sum = sum(seq[:-1])
        next_val = seq[-1]*weight + bias + mem_sum
        seq.append(next_val)
    return np.array(seq[:seq_len])
print(memory_sum(10))
print(memory_sum(10, bias=2))
print(memory_sum(10, weight=2, bias=0))
print(memory_sum(10, mem_len=1, bias=0))
print(memory_sum(10, mem_len=-1, bias=0))

In [None]:
# Generate data
n_features   = 1
sequence_len = 10
batch_size   = 1

n_entries = sequence_len*batch_size
seq = memory_sum(n_entries, weight=2, mem_len=1, bias=3)
# seq = memory_sum(n_entries, weight=2)
# seq = memory_sum(n_entries, weight=2, mem_len=1)
# seq = memory_sum(n_entries, weight=2, mem_len=-1)

x = seq.reshape(batch_size, sequence_len, n_features)
x_train = x[:, :-1,:]
y_train = x[:,1:  ,:] 
print(f'{x_train.shape = }; {y_train.shape = }')
print(x_train[0,:5,0], '...', x_train[0,-2:,0])
print(y_train[0,:5,0], '...', y_train[0,-2:,0])
# plt.plot(x_train[0,:,:], marker='o')

In [None]:
# Build model
n_input_features  = x_train.shape[-1]
n_output_features = y_train.shape[-1]

inputs = keras.Input(shape=(None, n_input_features))
rnn = keras.layers.SimpleRNN(
    n_output_features, 
    activation            = None,
    return_sequences      = True,
    return_state          = False,
    kernel_initializer    = keras.initializers.constant(tf.constant([[2.0]])),
    recurrent_initializer = keras.initializers.constant(tf.constant([[1.0]])),
    bias_initializer      = keras.initializers.constant(tf.constant([[3.0]])),
)
outputs = rnn(inputs)
model = keras.Model(inputs, outputs)

#opt = keras.optimizers.RMSprop(learning_rate=10e-4)
opt = keras.optimizers.Adam(learning_rate=10e-5)
model.compile(optimizer=opt, loss='mse', metrics='mae')

# Fit
X = tf.convert_to_tensor(x_train)
y = tf.convert_to_tensor(y_train)
history = model.fit(X, y, epochs=0)

$$y_t = \text{weight} \times x_{t} + \text{recurrent} \times h_{t-1} + \text{bias}$$
$$h_{t-1} = y_{t-1}$$

In [None]:
w = model.layers[1].get_weights()
assert len(w) == 3
print(f'''
weight    = {w[0][0,0]}
recurrent = {w[1][0,0]}
bias      = {w[2][0]}
''')

In [None]:
x_test = x_train[:1,:,:1]
y_test = y_train[:1,:,:1]
print(x_test.shape)
print(x_test[0,-10:,0])
print(y_test[0,-10:,0])

In [None]:
X = tf.convert_to_tensor(x_test)
y = tf.convert_to_tensor(y_test)
model.evaluate(X, y)

# loss             : 10609411.0000
# simple_rnn_loss  :  1324682.6250
# simple_rnn_1_loss:  9284728.0000
# simple_rnn_mae   :      996.0020
# simple_rnn_1_mae :     2992.0000

In [None]:
#y_pred, final_state = model.predict(x_test)
#print(f'Final hidden weight = {final_state[0,0]}')
y_pred = model.predict(X)
print('MAE =', np.abs(y_test - y_pred).mean())

In [None]:
n = 20
print(X[0,:n,0].numpy())
print(y_test[0,:n,0])
print(y_pred[0,:n,0].astype(int))
print(y_pred[0,:n,0].astype(int)-y_test[0,:n,0])

In [None]:
#plt.plot(x_test[0,:,0], label='Input')
plt.plot(y_test[0,:,0], label='True')
plt.plot(y_pred[0,:,0], label='Pred')
plt.legend()

In [None]:
plt.plot(y_pred[0,:,0] - y_test[0,:,0])
plt.xlabel('Epoch')
plt.ylabel('Pred - True')

# NumPy Implementation

In [None]:
inputs = np.array([1, 5, 14, 36, 89, 217, 526, 1272, 3073, 7421]).reshape(-1,1)
seq_len, input_features = inputs.shape
output_features = input_features
print(seq_len, input_features)

W = np.array([[2]])
U = np.array([[1]])
b = np.array([3])
state_t = np.array([0])

successive_outputs = []
for input_t in inputs:
    output_t = np.dot(W, input_t) + np.dot(U, state_t) + b
    print(f"RNN({input_t}) = {np.dot(W, input_t)} + {np.dot(U, state_t)} + {b} = {output_t}")
    successive_outputs.append(output_t)
    state_t = output_t
final_output_sequence = np.stack(successive_outputs)
print(final_output_sequence[:,0])
print(inputs[1:,0])


In [None]:
np.array([[1, 5, 14, 36, 89, 217, 526, 1272, 3073, 7421]]).shape