In [1]:
import numpy as np
import math
from matplotlib import pyplot as plt

sin_wave = np.array([math.sin(x) for x in np.arange(200)])
X = [] 
Y = []

sequence_len = 50
num_records = len(sin_wave) - sequence_len

for i in range(num_records - 50):
    X.append(sin_wave[i:i+sequence_len])
    Y.append(sin_wave[i + sequence_len])

In [2]:
X = np.array(X)
X = np.expand_dims(X, axis=2)

Y = np.array(X)
Y = np.expand_dims(X, axis=1)

In [3]:
X_val = []
Y_val = []

for i in range(num_records - 50, num_records):
    X_val.append(sin_wave[i:i + sequence_len])
    Y_val.append(sin_wave[i + sequence_len])

X_val = np.array(X_val)
X_val = np.expand_dims(X_val, axis=2)

Y_val = np.array(Y_val)
Y_val = np.expand_dims(Y_val, axis=1)

In [4]:
learning_rate = 0.0001    
nepoch = 25               
T = 50                   # length of sequence
hidden_dim = 100         
output_dim = 1

bptt_truncate = 5
min_clip_value = -10
max_clip_value = 10

#U is the weight matrix for weights between input and hidden layers
#V is the weight matrix for weights between hidden and output layers
#W is the weight matrix for shared weights in the RNN layer (hidden layer)

In [5]:
U = np.random.uniform(0, 1, (hidden_dim, T))
W = np.random.uniform(0, 1, (hidden_dim, hidden_dim))
V = np.random.uniform(0, 1, (output_dim, hidden_dim))

In [6]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [12]:
for epoch in range(nepoch):
    # check loss on train
    loss = 0.0
    
    # do a forward pass to get prediction
    for i in range(Y.shape[0]):
        x, y = X[i], Y[i]                    # get input, output values of each record
        prev_s = np.zeros((hidden_dim, 1))   # here, prev-s is the value of the previous activation of hidden layer; which is initialized as all zeroes
        for t in range(T):
            new_input = np.zeros(x.shape)    # we then do a forward pass for every timestep in the sequence
            new_input[t] = x[t]              # for this, we define a single input for that timestep
            mulu = np.dot(U, new_input)
            mulw = np.dot(W, prev_s)
            add = mulw + mulu
            s = sigmoid(add)
            mulv = np.dot(V, s)
            prev_s = s

    # calculate error 
        loss_per_record = (y - mulv)**2 / 2
        loss += loss_per_record
    loss = loss / float(y.shape[0])
    # check loss on val
    val_loss = 0.0
    for i in range(Y_val.shape[0]):
        x, y = X_val[i], Y_val[i]
        prev_s = np.zeros((hidden_dim, 1))
        for t in range(T):
            new_input = np.zeros(x.shape)
            new_input[t] = x[t]
            mulu = np.dot(U, new_input)
            mulw = np.dot(W, prev_s)
            add = mulw + mulu
            s = sigmoid(add)
            mulv = np.dot(V, s)
            prev_s = s

        loss_per_record = (y - mulv)**2 / 2
        val_loss += loss_per_record
    val_loss = val_loss / float(y.shape[0])

    print('Epoch: ', epoch + 1, ', Loss: ', loss, ', Val Loss: ', val_loss)
    # train model
    for i in range(Y.shape[0]):
        x, y = X[i], Y[i]
    
        layers = []
        prev_s = np.zeros((hidden_dim, 1))
        dU = np.zeros(U.shape)
        dV = np.zeros(V.shape)
        dW = np.zeros(W.shape)
        
        dU_t = np.zeros(U.shape)
        dV_t = np.zeros(V.shape)
        dW_t = np.zeros(W.shape)
        
        dU_i = np.zeros(U.shape)
        dW_i = np.zeros(W.shape)
        
        # forward pass
        for t in range(T):
            new_input = np.zeros(x.shape)
            new_input[t] = x[t]
            mulu = np.dot(U, new_input)
            mulw = np.dot(W, prev_s)
            add = mulw + mulu
            s = sigmoid(add)
            mulv = np.dot(V, s)
            layers.append({'s':s, 'prev_s':prev_s})
            prev_s = s
        # derivative of pred
        dmulv = (mulv - y)
        
        # backward pass
        for t in range(T):
            dV_t = np.dot(dmulv, np.transpose(layers[t]['s']))
            dsv = np.dot(np.transpose(V), dmulv)
            
            ds = dsv
            dadd = add * (1 - add) * ds
            
            dmulw = dadd * np.ones_like(mulw)

            dprev_s = np.dot(np.transpose(W), dmulw)


            for i in range(t-1, max(-1, t-bptt_truncate-1), -1):
                ds = dsv + dprev_s
                dadd = add * (1 - add) * ds

                dmulw = dadd * np.ones_like(mulw)
                dmulu = dadd * np.ones_like(mulu)

                dW_i = np.dot(W, layers[t]['prev_s'])
                dprev_s = np.dot(np.transpose(W), dmulw)

                new_input = np.zeros(x.shape)
                new_input[t] = x[t]
                dU_i = np.dot(U, new_input)
                dx = np.dot(np.transpose(U), dmulu)

                dU_t += dU_i
                dW_t += dW_i
                
            dV += dV_t
            dU += dU_t
            dW += dW_t
            if dU.max() > max_clip_value:
                dU[dU > max_clip_value] = max_clip_value
            if dV.max() > max_clip_value:
                dV[dV > max_clip_value] = max_clip_value
            if dW.max() > max_clip_value:
                dW[dW > max_clip_value] = max_clip_value
                
            
            if dU.min() < min_clip_value:
                dU[dU < min_clip_value] = min_clip_value
            if dV.min() < min_clip_value:
                dV[dV < min_clip_value] = min_clip_value
            if dW.min() < min_clip_value:
                dW[dW < min_clip_value] = min_clip_value
        
        # update
        U -= learning_rate * dU
        V -= learning_rate * dV
        W -= learning_rate * dW

Epoch:  1 , Loss:  [[[127348.78984593]
  [127374.47250441]
  [127393.87453188]
  [127389.63960128]
  [127365.50552237]
  [127343.30889145]
  [127343.90605885]
  [127366.72656432]
  [127390.35830394]
  [127393.45454148]
  [127373.28322862]
  [127347.91420518]
  [127340.95283186]
  [127359.04089644]
  [127385.06609163]
  [127395.26079183]
  [127380.60124462]
  [127354.11490771]
  [127340.17890618]
  [127352.03500046]
  [127378.39993507]
  [127394.92346206]
  [127386.88866971]
  [127361.39803229]
  [127341.64971616]
  [127346.28283506]
  [127370.87386157]
  [127392.4676954 ]
  [127391.66308456]
  [127369.16979571]
  [127345.24099516]
  [127342.26213972]
  [127363.07824584]
  [127388.07720535]
  [127394.56322383]
  [127376.80715018]
  [127350.65089266]
  [127340.31028064]
  [127355.63563794]
  [127382.08336105]
  [127395.37180154]
  [127383.70864962]
  [127357.42894477]
  [127340.59204242]
  [127349.15068544]
  [127374.9445289 ]
  [127394.02852979]
  [127389.34019285]
  [127365.01805218]
 

ValueError: shapes (100,1) and (1,50,1) not aligned: 1 (dim 1) != 50 (dim 1)