<a href="https://colab.research.google.com/github/danjohnvelasco/AI-Playground/blob/master/Recurrent_Neural_Networks_(RNN).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Recurrent Neural Networks

Building a 3 layer neural network, refactoring it to become RNN, improving the RNN by adding states, etc...

This is a way for me to understand RNNs deeply.



In [2]:
!nvidia-smi

Sun Nov  1 09:41:47 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.32.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   46C    P8    10W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install -U fastai

In [4]:
from fastai.text.all import *
path = untar_data(URLs.HUMAN_NUMBERS)

  return torch._C._cuda_getDeviceCount() > 0


In [5]:
path.ls()

(#2) [Path('/root/.fastai/data/human_numbers/valid.txt'),Path('/root/.fastai/data/human_numbers/train.txt')]

In [6]:
# Behaves like a list of `items` but can also index with list of indices or masks
lines = L(1,2,5)
lines[[0,2]] # You can't do this on a standard array

(#2) [1,5]

In [7]:
lines = L()

# concat two text files
with open(path/'train.txt') as f: 
    lines += L(*f.readlines())
with open(path/'valid.txt') as f: 
    lines += L(*f.readlines())

lines

(#9998) ['one \n','two \n','three \n','four \n','five \n','six \n','seven \n','eight \n','nine \n','ten \n'...]

In [8]:
# remove extra whitespaces and \n from string
# then concat each string item list into one big string
text = ' . '.join([l.strip() for l in lines])
text[:100]

'one . two . three . four . five . six . seven . eight . nine . ten . eleven . twelve . thirteen . fo'

In [9]:
# tokenize the dataset by splitting on spaces
tokens = text.split(' ')
tokens[:10]

['one', '.', 'two', '.', 'three', '.', 'four', '.', 'five', '.']

## Numericalization

In [10]:
# create a list containing all unique items
# me: why not just use sets? Answer: walang index ang sets (they are unordered)
# but you can loop over it.
vocab = L(*tokens).unique()
vocab

(#30) ['one','.','two','three','four','five','six','seven','eight','nine'...]

In [11]:
# example of enumerate
# enumerate adds counter to an iterable (e.g. 0, "whatever") and returns as a 
# enumerate object. This is useful if you want to get the index of each element
# such as numericalization
for w,i in enumerate(vocab[:10]):
    print(w,i)

0 one
1 .
2 two
3 three
4 four
5 five
6 six
7 seven
8 eight
9 nine


In [12]:
# Assign an index for each unique word
word_to_index = {word:index for index, word in enumerate(vocab)}
word_to_index

{'.': 1,
 'eight': 8,
 'eighteen': 18,
 'eighty': 26,
 'eleven': 11,
 'fifteen': 15,
 'fifty': 23,
 'five': 5,
 'forty': 22,
 'four': 4,
 'fourteen': 14,
 'hundred': 28,
 'nine': 9,
 'nineteen': 19,
 'ninety': 27,
 'one': 0,
 'seven': 7,
 'seventeen': 17,
 'seventy': 25,
 'six': 6,
 'sixteen': 16,
 'sixty': 24,
 'ten': 10,
 'thirteen': 13,
 'thirty': 21,
 'thousand': 29,
 'three': 3,
 'twelve': 12,
 'twenty': 20,
 'two': 2}

In [13]:
# Numericalize by feeding the tokens into the dictionary and converting 
# the strings into its corresponding index

nums = L(word_to_index[i] for i in tokens)
nums

(#63095) [0,1,2,1,3,1,4,1,5,1...]

# Language Model from scratch

neural network architecture that takes three words as input, and returns a prediction of the probability of each possible next word in the vocab

Jargon: 

hidden state (h): The activations that are updated at each step of a recurrent neural network.



In [14]:
# create trigrams
# input data are converted to tensors (for training)
seqs = L((tensor(nums[i:i+3]), nums[i+3]) for i in range(0,len(nums)-4,3))
seqs

(#21031) [(tensor([0, 1, 2]), 1),(tensor([1, 3, 1]), 4),(tensor([4, 1, 5]), 1),(tensor([1, 6, 1]), 7),(tensor([7, 1, 8]), 1),(tensor([1, 9, 1]), 10),(tensor([10,  1, 11]), 1),(tensor([ 1, 12,  1]), 13),(tensor([13,  1, 14]), 1),(tensor([ 1, 15,  1]), 16)...]

In [28]:
bs = 32
cut = int(len(seqs) * 0.8)
train = seqs[:cut] # 80% of data
test = seqs[cut:] # 20% of data
dls = DataLoaders.from_dsets(train, test, bs=bs, shuffle=False) # create dataloader 

In [33]:
dls.dataset[0][1]

1

In [43]:
class LM1(Module): # Module is same as nn.Module but no need to call super().__init__
    # i == input, h = hidden, o == output
    def __init__(self, vocab_sz, n_hidden):
        # Embedding takes an index as input and outputs an embedding matrix
        self.i_h = nn.Embedding(vocab_sz, n_hidden) # vocab_sz == how many words in a vocab. n_hidden, you decide how wide yung embeddings
        self.h_h = nn.Linear(n_hidden, n_hidden) # Linear does this: y = xA^T + b, where A == weights, b = bias
        self.h_o = nn.Linear(n_hidden, vocab_sz) # outputs probability of each word in the vocab

    def forward(self, x):
        # x shape: bs x input cols (word in a trigram) kaya x[:,0] means all first word of each item in a batch size
        # here, h is an activation
        h = F.relu(self.h_h(self.i_h(x[:,0]))) # 1st word --> embedding --> linear --> non-linear; 64 x 64
        h = h + self.i_h(x[:,1]) # 2nd word embedding added to activations of 1st word; 64 x 64 
        h = F.relu(self.h_h(h))
        h = h + self.i_h(x[:,2]) # 3rd word embedding added to activations of 1st and 2nd word; 64 x 64
        h = F.relu(self.h_h(h))
        return self.h_o(h)

In [44]:
learn = Learner(dls, LM1(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(4, 1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.421474,1.829477,0.487045,00:03
1,1.224868,1.838881,0.483242,00:03
2,1.316173,1.558987,0.489185,00:04
3,1.312844,1.590017,0.503447,00:03


# RNN

Just a refactored version (adding a for loop) of a fully connected neural network

In [45]:
class LM2(Module):
    def __init__(self, vocab_sz, n_hidden):
        self.i_h = nn.Embedding(vocab_sz, n_hidden) 
        self.h_h = nn.Linear(n_hidden, n_hidden) 
        self.h_o = nn.Linear(n_hidden, vocab_sz) 
    
    def forward(self, x):
        h = 0
        n = x.shape[1] # how wide is the input

        for i in range(n): # n = 3 because it's a trigram
            h = h + self.i_h(x[:,i]) # ith word --> embedding --> linear --> non-linear; 64 x 64
            h = F.relu(self.h_h(h))
            
        return self.h_o(h)


In [46]:
learn = Learner(dls, LM2(len(vocab), 64), loss_func=F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(4, 1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.425016,1.924794,0.486808,00:03
1,1.21848,1.903235,0.485857,00:03
2,1.31584,1.65513,0.495603,00:04
3,1.320415,1.608845,0.499406,00:03


# Improving RNN by maintaing the state