In [1]:
# Assignment 3
# Problem 2
# Recurrent Neural Networks

In [2]:
import torch.nn as nn
class Vanilla_RNN(nn.Module):
    """
    The vanilla RNN: from (x_t,h_t-1) input,hidden-state
        h_t = tanh( R*h_t-1 + A*x_t)
        y_t = B*h_t
     where A is the encoder, B the decoder, R the recurrent matrix
    """
    def __init__(self, input_size, hidden_size, output_size):
        super(Vanilla_RNN, self).__init__()
        self.hidden_size = hidden_size
        self.A = nn.Linear(input_size, hidden_size)
        with torch.no_grad():
            self.A.weight.copy_(torch.tensor([[1, -1, -1/2, 1/2],[1, 1, -1/2, -1]],dtype=torch.long))
        self.R = nn.Linear(hidden_size, hidden_size)
        with torch.no_grad():
            self.R.weight.copy_(torch.tensor([[1,0],[0,1]],dtype=torch.long))
        self.B = nn.Linear(hidden_size, output_size)
        with torch.no_grad():
            self.B.weight.copy_(torch.tensor([[1,1], [1/2, 1], [-1,0], [0, -1/2]],dtype=torch.long))
        self.tanh = nn.Tanh()
    
    def forward(self, x, h):
        # update the hidden state
        h_update = self.tanh( self.R(h) + self.A(x) )
        # prediction
        y = self.B(h_update)
        return y,h_update

    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

In [3]:
ALL_LETTERS ='helo'
NB_LETTERS = 4

In [4]:
import torch
def letterToIndex(letter):
    """ Find letter index from all_letters, e.g. "a" = 0 """
    #print("found: ", ALL_LETTERS.find(letter))
    return ALL_LETTERS.find(letter)

def letterToTensor(letter):
    """ Transform a letter into a 'hot-vector' (tensor) """
    #tensor = torch.zeros(1, NB_LETTERS,dtype=torch.long)
    tensor = torch.zeros(1, NB_LETTERS)
    tensor[0][letterToIndex(letter)] = 1
    return tensor
#print("Embedding of the character 'c':")
#letterToTensor('c')

In [5]:
P = {'n_steps' : 1000,
     'lr' : .005,
     'chunk_len' : 5}
myText='hello'

In [6]:
def predictChars(myRnn, myText, P):
    h = myRnn.init_hidden()
    start_index=0
    end_index=len(myText)-1
    result=""
    for p in range(P['chunk_len']):
        x = letterToTensor( myText[p] )
        y, h = myRnn(x, h)
        print(y)
        print("input: ",myText[p])
        predicted_char = ALL_LETTERS[y.argmax()]
        print("output: ",predicted_char)
        result += predicted_char
    print(str.format("final prediction for 5 chars is {}",result))    

In [7]:
hidden_size = 2
in_out_size = NB_LETTERS
myRnn = Vanilla_RNN(in_out_size,hidden_size,in_out_size)
predictChars(myRnn,myText,P)
match = "olleh"

tensor([[ 1.4481,  1.0917, -0.6069, -0.6729]], grad_fn=<AddmmBackward>)
input:  h
output:  h
tensor([[ 0.8374,  1.1223,  0.0344, -0.6729]], grad_fn=<AddmmBackward>)
input:  e
output:  e
tensor([[ 1.1069,  1.0913, -0.2662, -0.6729]], grad_fn=<AddmmBackward>)
input:  l
output:  h
tensor([[ 1.2844,  1.0890, -0.4459, -0.6729]], grad_fn=<AddmmBackward>)
input:  l
output:  h
tensor([[ 1.1451,  0.8730, -0.5227, -0.6729]], grad_fn=<AddmmBackward>)
input:  o
output:  h
final prediction for 5 chars is hehhh


In [8]:
import random
import pandas as pd
def train_RNN(myRnn,myText,P,shouldPrintTraining=False):
    """
    Train a recurrent neural network from a text ('myText'). The dictionary P
    should contain:
       . the learning rate 'lr'
       . the number of steps 'n_steps'
       . the size of the sentence trained on 'chunk_len'
    """
    # init
    optimizer = torch.optim.Adam(myRnn.parameters(), lr=P['lr'])
    criterion = nn.CrossEntropyLoss()
    df = pd.DataFrame(columns=('step', 'loss'))
    # train
    for step in range(P['n_steps']):
        # A) initialize
        h = myRnn.init_hidden()
        optimizer.zero_grad()
        loss = 0.0
        # B) pick a chunk from the text
        start_index = random.randint(0, len(myText) - P['chunk_len'])
        end_index = start_index + P['chunk_len'] + 1
        chunk = myText[start_index:end_index]
        if (shouldPrintTraining) & (step%50 == 0):
            print(" input  = ", chunk)
            chunk_predicted=""
            #chunk_predicted = chunk[0]
        # C) prediction
        for p in range(P['chunk_len']):
            # init
            x = letterToTensor( chunk[p] )
            #x_next = letterToTensor( chunk[p+1] )
            letter_x_next = letterToIndex(match[p])
            #print(chunk[p+1])
            #print(letter_x_next)
            target = torch.tensor([letter_x_next],dtype=torch.long)
            # prediction
            y, h = myRnn(x, h)
            # loss
            loss += criterion(y.view(1,-1), target)
            if (shouldPrintTraining):
                chunk_predicted += ALL_LETTERS[y.argmax()]
        # D) gradient step
        loss.backward()
        optimizer.step()
        # E) save loss
        ave_loss = loss.detach().numpy() / P['chunk_len']
        if (shouldPrintTraining) & (step%50 == 0):
            print(" output = ", chunk_predicted)
        df.loc[step] = [step, ave_loss]
        if (step%50 == 0):
            # print only once every 50 steps
            print('loss at step ',str(step),' : ', str(ave_loss))
    # result
    return df

In [9]:
myRnn = Vanilla_RNN(in_out_size,hidden_size,in_out_size)
df = train_RNN(myRnn,myText,P,True)

 input  =  hello
 output =  heeel
loss at step  0  :  1.6518274307250977
 input  =  hello
 output =  hllll
loss at step  50  :  1.2891677856445312
 input  =  hello
 output =  hlllh
loss at step  100  :  1.0270138740539552
 input  =  hello
 output =  hlllh
loss at step  150  :  0.8214122772216796
 input  =  hello
 output =  olllh
loss at step  200  :  0.5997264862060547
 input  =  hello
 output =  olleh
loss at step  250  :  0.3920018196105957
 input  =  hello
 output =  olleh
loss at step  300  :  0.26860618591308594
 input  =  hello
 output =  olleh
loss at step  350  :  0.20167596340179444
 input  =  hello
 output =  olleh
loss at step  400  :  0.15901960134506227
 input  =  hello
 output =  olleh
loss at step  450  :  0.12925336360931397
 input  =  hello
 output =  olleh
loss at step  500  :  0.10735998153686524
 input  =  hello
 output =  olleh
loss at step  550  :  0.09068173170089722
 input  =  hello
 output =  olleh
loss at step  600  :  0.0776459038257599
 input  =  hello
 outp

In [10]:
print(myRnn.A.weight)
print(myRnn.B.weight)
print(myRnn.R.weight)

Parameter containing:
tensor([[ 1.4028, -2.0216, -1.0512,  1.1715],
        [-1.1526,  0.1177,  1.6872, -0.1156]], requires_grad=True)
Parameter containing:
tensor([[ 2.5203,  2.5191],
        [-2.3419,  3.3373],
        [-3.0594, -1.7435],
        [ 1.9378, -2.0168]], requires_grad=True)
Parameter containing:
tensor([[ 0.2546,  1.5832],
        [-1.3031,  2.7908]], requires_grad=True)


In [11]:
# define a RNN

#     hidden_size = 2
#     in_out_size = NB_LETTERS
#     A = torch.tensor([[1, -1, -1/2, 1/2],[1, 1, -1/2, -1]])
#     R = torch.tensor([[1,0],[0,1]])
#     B = torch.tensor([[1,1], [1/2, 1], [-1,0], [0, -1/2]])
#     myRnn = Vanilla_RNN(in_out_size,hidden_size,in_out_size)

# test it
#     x_0 = letterToTensor('l')
#     h = myRnn.init_hidden()
#     y_0,h = myRnn(x_0,h)
#     print(y_0)#pick the one letter which has the highest score associated
#     print("input: ",'T')
#     predicted_char = ALL_LETTERS[y_0.argmax()]
#     print("output: ",predicted_char)