In [1]:
#This module implments the VANILLA RNN to train an NLP module
#Original source code done by HOUSEZAMETNAT 
#at https://gist.github.com/karpathy/d4dee566867f8291f086
#For slides of basics about Original RNN, refer to: 
# http://cs231n.stanford.edu/slides/2018/cs231n_2018_lecture10.pdf
#Adapted to Python 3

In [1]:
#Note that this module is implemented entirely using numpy, not TENSORFLOW.
import numpy as np

In [2]:
#Test an illustration of choosing index by vectors:
a = np.random.randint(0,10,size=(10,1))
print("a:",a)
indx = [1,4,5,7]
print("a[indx]",a[indx])

a: [[8]
 [3]
 [3]
 [8]
 [2]
 [0]
 [0]
 [2]
 [2]
 [4]]
a[indx] [[3]
 [2]
 [0]
 [2]]


In [3]:
#Example of exponential broadcasting 
exp = np.exp(a)
print("Exp:",exp)
y = np.exp(a)/np.sum(np.exp(a))
print("y:",y)
del a, indx, exp, y

Exp: [[2.98095799e+03]
 [2.00855369e+01]
 [2.00855369e+01]
 [2.98095799e+03]
 [7.38905610e+00]
 [1.00000000e+00]
 [1.00000000e+00]
 [7.38905610e+00]
 [7.38905610e+00]
 [5.45981500e+01]]
y: [[4.90220418e-01]
 [3.30307919e-03]
 [3.30307919e-03]
 [4.90220418e-01]
 [1.21513493e-03]
 [1.64450630e-04]
 [1.64450630e-04]
 [1.21513493e-03]
 [1.21513493e-03]
 [8.97870015e-03]]


In [4]:
#Step 1: Import data (must be a text file)
#, then create a list of indices of unique words in the file
data = open('text.txt','r').read().split()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print("The text contains %d unique words!"%(vocab_size))
#Create indices
char_to_ix = {x.lower():i for i,x in enumerate(chars)}
ix_to_char = {i:x.lower() for i,x in enumerate(chars)}

The text contains 83 unique words!


In [5]:
#Define Hyperparameters 
hidden_size = 100
seq_length = 10
learning_rate = 1e-1

In [6]:
#Define Model Parameters
#Recall that in vanilla RNN, we have the gate for input, gate for previous info
#Below are the correspoinding weights
Wxh = np.random.randn(hidden_size,vocab_size)*0.01 
Whh = np.random.randn(hidden_size,hidden_size)*0.01
#Note that Wxh and Whh should have the same dimensions because 
#they will be element-wise added later
Why = np.random.randn(vocab_size,hidden_size)*0.01
bh  = np.zeros((hidden_size, 1))
by  = np.zeros((vocab_size, 1))

In [7]:
def lossFun(inputs, targets, hprev):
    '''
        Input: list of integers corresponding to the indices of words
        Targets: also a list of integer
        Return: loss, gradients and last hidden state
    '''
    xs, hs, ys, ps = {},{},{},{}
    hs[-1] = np.copy(hprev)
    loss = 0
    #FORWARD PASS: through the entire lenngth of the input vector:
    for t in range(len(inputs)): #use t as subsript for 'time'
        xs[t] = np.zeros((vocab_size,1))
        xs[t][inputs[t]] = 1
        #Recall acvitation function: current_cell = tanh(W1*current_input +  W2*previous_state)
        hs[t] = np.tanh(np.dot(Wxh,xs[t]) + np.dot(Whh,hs[t-1]) + bh) #don't forget the bias terms
#         print("Shape of hs:",hs[t].shape)
        ys[t] = np.dot(Why,hs[t]) + by
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
#         print("Shape of probability output vector",ps[t].shape) #Should be (vocab_size,1)
        loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
    #BACKWARD PASS: upgrade the gradient 
    #The Gradients should have the same shape
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])
    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])
        dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
        dWhy += np.dot(dy, hs[t].T)
        dby += dy
        dh = np.dot(Why.T, dy) + dhnext # backprop into h
        dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
        dbh += dhraw
        dWxh += np.dot(dhraw, xs[t].T)
        dWhh += np.dot(dhraw, hs[t-1].T)
        dhnext = np.dot(Whh.T, dhraw)
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

In [8]:
def sample(h, seed_ix, n):
    '''Randomly sample from the network sometimes. 
    '''
    x = np.zeros((vocab_size,1))
    x[seed_ix] = 1
    ixes = []
    assert Wxh.shape[1] == x.shape[0], "Wxh vs x shape mismatched {} vs {}".format(Wxh.shape, x.shape)
    assert Whh.shape[1] == h.shape[0], "Whh vs h shape mismatched {} vs {}".format(Whh.shape, h.shape)
    #Loop through n
    for t in range(n):
        h = np.tanh(np.dot(Wxh,x) + np.dot(Whh,h) + bh)
#         print("h shape {}".format(h.shape))
        y = np.dot(Why,h) + by
        #Find the probability
        p = np.exp(y)/np.sum(np.exp(y))
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        x = np.zeros((vocab_size, 1))
        x[ix] = 1
        ixes.append(ix)
    return ixes

## Implement the run through data


In [9]:
n,p = 0,0
#Set up the memory for AdamGrad
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) 
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
counter = 0
while n < 1e4:
    # prepare inputs (we're sweeping from left to right in steps seq_length long)
    if p+seq_length+1 >= len(data) or n==0:
        hprev = np.zeros((hidden_size,1))
        p = 0
    inputs = [char_to_ix[ch.lower()] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch.lower()] for ch in data[p+1:p+seq_length+1]]
#     print("Inputs",inputs)
#     print("Targets",targets) 
    # Sample from the model now and then
    if n % 100 == 0:
        sample_ix = sample(hprev, inputs[0], 100)
        txt = ' '.join(ix_to_char[ix] for ix in sample_ix)#Add a random word in ?
        print('----\n %s \n----'% (txt, ))
    counter+=1 #Added for counter later

    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    if n % 100 == 0: 
        print('iter %d, loss: %f'% (n, smooth_loss)) # print progress

    # perform parameter update with Adagrad
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                            [dWxh, dWhh, dWhy, dbh, dby], 
                            [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

    p += seq_length # move data pointer
    n += 1 # iteration counter 


----
 took i'll anymore more to because picture you knees into again heard my me go (anymore, accidentally best just more again swearing into hoping song, yours" ever places know ever off anymore) not time just never you're name and i've (love, mind mean mean am i'd accidentally picture one yours" anymore hoping took you "i'm off swearing off your hear i'll bad? to mind on again time off took i'll so wander we slipped anymore i've feel because (love, places anymore, knees well, accidentally anymore) "i'm you hoping last it (anymore, accidentally last so feel time swearing name i'm insecure 
----
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
iter 0, loss: 44.188411
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)

Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of h

Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of h

Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of h

Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of h

Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of h

Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of h

Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of h

Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of hs: (100, 1)
Shape of h

KeyboardInterrupt: 