# RNN Language Model

In [55]:
import torch
import torch.nn as nn
import numpy as np
from torch.nn.utils import clip_grad_norm_

In [56]:
#https://github.com/pytorch/examples/tree/master/word_language_model 
class Dictionary(object):
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.idx = 0
    
    def add_word(self, word):
        if not word in self.word2idx:
            self.word2idx[word] = self.idx
            self.idx2word[self.idx] = word
            self.idx += 1
    
    def __len__(self):
        return len(self.word2idx)


class Corpus(object):
    def __init__(self):
        self.dictionary = Dictionary()

    def get_data(self, path, batch_size=20):
        # Add words to the dictionary
        with open(path, 'r') as f:
            tokens = 0
            for line in f:
                words = line.split() + ['<eos>']
                tokens += len(words)
                for word in words: 
                    self.dictionary.add_word(word)  
        
        # Tokenize the file content
        ids = torch.LongTensor(tokens)
        token = 0
        with open(path, 'r') as f:
            for line in f:
                words = line.split() + ['<eos>']
                for word in words:
                    ids[token] = self.dictionary.word2idx[word]
                    token += 1
        num_batches = ids.size(0) // batch_size
        ids = ids[:num_batches*batch_size]
        return ids.view(batch_size, -1)

## Hyper-parameters

In [57]:
embed_size = 128
hidden_size = 1024
num_layers = 1
num_epochs = 3
num_samples = 500
batch_size = 20
seq_length = 30
learning_rate = 0.002

In [58]:
corpus = Corpus()
#ids = corpus.get_data('anna.txt', batch_size)
ids = corpus.get_data('reviews.txt', batch_size)
vocab_size = len(corpus.dictionary)
num_batches = ids.size(1) // seq_length

## Language Model

In [59]:
class RNNLM(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(RNNLM, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, vocab_size)
        
    def forward(self, x, h):
        x = self.embed(x)
        out, (h, c) = self.lstm(x, h)
        out = out.reshape(out.size(0)*out.size(1), out.size(2))
        out = self.linear(out)
        return out, (h, c)

In [60]:
model = RNNLM(vocab_size, embed_size, hidden_size, num_layers)

In [61]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [62]:
def detach(states):
    return [state.detach() for state in states]

In [63]:
for epoch in range(num_epochs):
    states = ( torch.zeros(num_layers, batch_size, hidden_size),
                    torch.zeros(num_layers, batch_size, hidden_size))
    
    for i in range(0, ids.size(1) - seq_length, seq_length):
        # inputs and targets
        inputs = ids[:, i:i+seq_length]
        targets = ids[:, (i+1):(i+1)+seq_length]
        # Forward pass
        states = detach(states)
        outputs, states = model(inputs, states)
        loss = criterion(outputs, targets.reshape(-1))
        
        # Backward and optimize
        model.zero_grad()
        loss.backward()
        clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        step = (i+1) // seq_length
        if step % 100 == 0:
            print ('Epoch [{}/{}], Step[{}/{}], Loss: {:.4f}, Perplexity: {:5.2f}'
                   .format(epoch+1, num_epochs, step, num_batches, loss.item(), np.exp(loss.item())))

Epoch [1/3], Step[0/324], Loss: 10.0585, Perplexity: 23353.19
Epoch [1/3], Step[100/324], Loss: 7.2074, Perplexity: 1349.44
Epoch [1/3], Step[200/324], Loss: 6.4695, Perplexity: 645.14
Epoch [1/3], Step[300/324], Loss: 6.8140, Perplexity: 910.52
Epoch [2/3], Step[0/324], Loss: 6.1222, Perplexity: 455.87
Epoch [2/3], Step[100/324], Loss: 5.7712, Perplexity: 320.93
Epoch [2/3], Step[200/324], Loss: 5.1787, Perplexity: 177.45
Epoch [2/3], Step[300/324], Loss: 5.3867, Perplexity: 218.49
Epoch [3/3], Step[0/324], Loss: 4.9553, Perplexity: 141.93
Epoch [3/3], Step[100/324], Loss: 4.6535, Perplexity: 104.95
Epoch [3/3], Step[200/324], Loss: 4.0111, Perplexity: 55.21
Epoch [3/3], Step[300/324], Loss: 3.8243, Perplexity: 45.80


In [64]:
torch.save(model.state_dict(), 'model1.ckpt')

## Examples

### First word is random word, next ones are predicted

In [69]:
prediction_words = ""
state = (torch.zeros(num_layers, 1, hidden_size), torch.zeros(num_layers, 1, hidden_size))    
prob = torch.ones(vocab_size)
input = torch.multinomial(prob, num_samples=1).unsqueeze(1)
for i in range(num_samples):
    output, state = model(input, state)
    prob = output.exp()
    word_id = torch.multinomial(prob, num_samples=1).item()
    input.fill_(word_id)
    word = corpus.dictionary.idx2word[word_id]
    word = '\n' if word == '<eos>' else word + ' '
    prediction_words = prediction_words + word

prediction_words

'flavors to it spa with a side of mine! and that its Chinese! Service was Chinese, (Thanks Owen!), when it is beter than out, shoping, you may have skewed The decor is rare and their cheseburger.. sauce were oh-so-god. special, round. Seriously, I highly recomend Whitey I highly recomend limón amarilo and have order to ben Tony packed, very big and lots of staf. charge for the crust. Ad the Nordstrom to the top notch Another there are many more satisfying. That from traditional So you can pul Touch of the fod. knowledgeable a sample extravaganza and then the waiter subsequently the wonderful things of host for my years. The 18 is wel Granted, and with the stay Their bers are great. \nMy husband and I recently I could eat here (198) and once al true. talking, I wil start this place alot. Yelp! months Shoe \nTried $ Pan casual folks, lengthwise, Turkish National of membership for me. Again, absolute delight in was our first impresion did not se what it was the same time. Nothing though, 