## Elman Networks (RNN)

[Finding Structure in Time](http://psych.colorado.edu/~kimlab/Elman1990.pdf) (1990)

In [2]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# np.random.seed(123)

In [3]:
def sigmoid(x): 
    return 1 / (1 + np.exp(-x))

def sigmoid_backward(sx):
    return sx * (1 - sx)

In [4]:
x = np.array([[0, 0], [0, 1], [1, 1], [1, 0], [0, 1], [1, 1], [1, 0], [0, 1], [1, 1], [1, 0]])
y = np.array([[0, 1, 0, 1, 1, 0, 1, 0, 1, 0]]).T

In [5]:
# Define layers - Encoding (Input), Hidden & Output
_, encoding_dim = x.shape
hidden_dim = 5
output_dim = len(y.T)

# Init weights
w1 = np.random.random((encoding_dim, hidden_dim))
w2 = np.random.random((hidden_dim, output_dim))
w3 = np.random.random((hidden_dim, hidden_dim))

In [6]:
epochs = 5000
lr = 0.3
loss = []

context = np.zeros((1, hidden_dim))

for _ in range(epochs):

    # Forward pass
    hidden = sigmoid(x @ w1)
    logits = sigmoid(hidden @ w2)

    # Backprop - Chain rule
    output_error = y - logits # Prediction error
    dt_do = output_error * sigmoid_backward(logits) # Target direction

    hidden_error = dt_do @ w2.T # Layer 1 contribution to layer 2 error (according to weights)
    dt_dh = hidden_error * sigmoid_backward(hidden)

    # Update weights - Delta rule
    w2 += lr * (hidden.T @ dt_do)
    w1 += lr * (x.T @ dt_dh)

    loss.append(abs(sum(output_error)/len(y))) # Callback error

In [None]:
for x, y in zip(x, y):
    h_pred = sigmoid(x @ w1)
    y_pred = sigmoid(h_pred @ w2)

    print('Input:', x, [int(sample > 0.5) for sample in x], '| Ground Truth:', y, '| Prediction:', int(y_pred > 0.5))

In [None]:
plt.figure()
plt.plot(loss)
plt.title('MLP Error Decay')
plt.grid(axis='both', color='0.95')
plt.xlabel('Epochs'), plt.ylabel('Loss')