In [48]:
import numpy as np
import torch 
import sys
import torch.nn.functional as F

In [2]:
inputString = [2, 45,30, 55,10]
outputString = [45, 30, 55, 10, 1]

In [3]:
numFeatures = 100
vocabSize = 80

In [5]:
#define embeddings 
embeddings = []
for i in range(len(inputString)):
    x = np.random.randn(numFeatures,1)
    embeddings.append(x)
    

In [6]:
embeddings[0].shape

(100, 1)

In [7]:
len(embeddings)

5

In [8]:
def getOneHot(idx):
    one_hot = np.zeros((vocabSize,1))
    one_hot[idx]= 1
    return one_hot

In [10]:
print(len(getOneHot(2)))

80


In [11]:
print(getOneHot(2))

[[0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]


In [14]:
numUnits = 50
h0 = torch.tensor(np.zeros((numUnits,1)))
Wh = torch.tensor(np.random.uniform(0,1,(numUnits,numUnits)),requires_grad = True)
Wx = torch.tensor(np.random.uniform(0,1,(numUnits,numFeatures)),requires_grad = True)
Wy = torch.tensor(np.random.uniform(0,1,(vocabSize,numUnits)),requires_grad = True)

In [15]:
print(Wh.shape, Wx.shape, Wy.shape, h0.shape)

torch.Size([50, 50]) torch.Size([50, 100]) torch.Size([80, 50]) torch.Size([50, 1])


In [20]:
def stepForward(xt, Wx, Wh, Wy, prevMemory):
    x_frd = torch.matmul(Wx, torch.from_numpy(xt))
    h_frd = torch.matmul(Wh, prevMemory)
    ht = torch.tanh(x_frd+h_frd)
    yt_hat = F.softmax(torch.matmul(Wy, ht), dim= 0)
    return ht,yt_hat

In [21]:
ht, yt_hat = stepForward(embeddings[0],Wx,Wh,Wy,h0)

In [22]:
ht.shape

torch.Size([50, 1])

In [23]:
yt_hat.shape

torch.Size([80, 1])

In [24]:
yt_hat.sum()

tensor(1.0000, dtype=torch.float64, grad_fn=<SumBackward0>)

In [31]:
def fullForwardRNN(X, Wx, Wh, Wy, prevMemory):
    y_hat = []
    ht = prevMemory
    for t in range(len(X)):
        ht, yt_hat = stepForward(X[t],Wx,Wh, Wy, prevMemory)
        prevMemory = ht
        y_hat.append(yt_hat)
    return y_hat

In [34]:
y_hat = fullForwardRNN(embeddings,Wx,Wh,Wy,h0)

In [35]:
len(y_hat)

5

In [36]:
y_hat[0].shape

torch.Size([80, 1])

In [40]:
def computeLoss(y,y_hat):
    loss = 0
    for yi, yi_hat in zip(y, y_hat):
        Li= -torch.log2(yi_hat[yi==1])
        loss+= Li
    return loss

In [41]:
y = []
for idx in outputString:
    y.append(getOneHot(idx))


In [42]:
print(computeLoss(y,y_hat))

tensor([42.5490], dtype=torch.float64, grad_fn=<AddBackward0>)


In [52]:
def updateParams(Wx,Wh,Wy,dWx, dWh,dWy, lr):
    with torch.no_grad():
        Wx -= lr*dWx 
        Wh -= lr*dWh
        Wy -= lr*dWy
    return Wx, Wh, Wy

In [53]:
def trainRnn(X, y, Wx, Wh, Wy, prevMemory, lr, nepoch):
    losses = []
    for epoch in range(nepoch):
        y_hat = fullForwardRNN(X, Wx,Wh,Wy,prevMemory)
        loss = computeLoss(y, y_hat)
        loss.backward()
        losses.append(loss)
        print("Loss after epoch = %d: %f" %(epoch,loss))
        sys.stdout.flush()
        dWx = Wx.grad.data
        dWh = Wh.grad.data
        dWy = Wy.grad.data
        Wx,Wh,Wy = updateParams(Wx,Wh,Wy,dWx, dWh,dWy, lr)
        Wx.grad.data.zero_()
        Wh.grad.data.zero_()
        Wy.grad.data.zero_()
    return Wx, Wh, Wy, losses


In [54]:
Wx, Wh, Wy, losses = trainRnn(embeddings, y, Wx, Wh, Wy, h0, 0.001, 100)

Loss after epoch = 0: 41.389683
Loss after epoch = 1: 39.686380
Loss after epoch = 2: 39.141189
Loss after epoch = 3: 38.602741
Loss after epoch = 4: 38.070988
Loss after epoch = 5: 37.545886
Loss after epoch = 6: 37.027390
Loss after epoch = 7: 36.515458
Loss after epoch = 8: 36.010044
Loss after epoch = 9: 35.511102
Loss after epoch = 10: 35.018583
Loss after epoch = 11: 34.532435
Loss after epoch = 12: 34.052606
Loss after epoch = 13: 33.579040
Loss after epoch = 14: 33.111681
Loss after epoch = 15: 32.650474
Loss after epoch = 16: 32.195362
Loss after epoch = 17: 31.746290
Loss after epoch = 18: 31.303204
Loss after epoch = 19: 30.866054
Loss after epoch = 20: 30.434790
Loss after epoch = 21: 30.009369
Loss after epoch = 22: 29.589753
Loss after epoch = 23: 29.175906
Loss after epoch = 24: 28.767803
Loss after epoch = 25: 28.365423
Loss after epoch = 26: 27.968755
Loss after epoch = 27: 27.577797
Loss after epoch = 28: 27.192559
Loss after epoch = 29: 26.813059
Loss after epoch = 3