In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable

import time
import math

use_cuda = torch.cuda.is_available()

In [None]:
class LMConfiguration(object):
    rnn_type = 'LSTM'
    vocab_size = 5000
    embedding_dim = 200
    hidden_dim = 200
    n_layers = 2
    dropout = 0.5
    tied_weights = True
    
    max_len = 30
    learning_rate = 20

In [None]:
class RNNModel(nn.Module):
    def __init__(self, config):
        super(RNNModel, self).__init__()
        
        dropout = config.dropout
        vocab_size = config.vocab_size
        embedding_dim = config.embedding_dim
        tied_weights = config.tied_weights
        
        self.hidden_dim = hidden_dim = config.hidden_dim
        self.rnn_type = rnn_type = config.rnn_type
        self.n_layers = n_layers = config.n_layers
        
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(vocab_size, embedding_dim)
        
        if rnn_type in ['LSTM', 'GRU']:
            self.rnn = getattr(nn, rnn_type)(embedding_dim, hidden_dim, n_layers, dropout=dropout)
        else:
            raise ValueError("""'rnn_type' error, use ['LSTM', 'GRU']""")
            
        self.decoder = nn.Linear(hidden_dim, vocab_size)
        
        if tied_weights:
            if embedding_dim != hidden_dim:
                raise ValueError('When using the tied falg, embedding_dim must be equal to hidden_dim')
            self.decoder.weight = self.encoder.weight   
        
        self.init_weights()
        
    def forward(self, inputs, hidden):
        embedded = self.drop(self.encoder(inputs))
        output, hidden = self.rnn(embedded, hidden)
        output = self.drop(output)
        decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))
        return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
        
            
    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.fill_(0)
        
    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
        if self.rnn_type == 'LSTM':
            # LSTM h0, c0
            return (Variable(weight.new(self.n_layers, bsz, self.hidden_dim).zero_()),
                    Variable(weight.new(self.n_layers, bsz, self.hidden_dim).zero_()))
        else:
            # GRU h0
            return Variable(weight.new(self.n_layers, bsz, self.hidden_dim).zero_())

In [None]:
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    if use_cuda:
        data = data.cuda()
    return data

In [None]:
from data_zh import *

In [None]:
corpus = Corpus('data/sanguoyanyi.txt')

In [None]:
print(len(corpus.dictionary))
print(len(corpus.train))

In [None]:
train_batch_size = 20
config = LMConfiguration()
train_data = batchify(corpus.train, train_batch_size)

In [None]:
print(train_data.size())

In [None]:
config.vocab_size = len(corpus.dictionary)
model = RNNModel(config)

In [None]:
if use_cuda:
    model.cuda()

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
def get_batch(source, i, evaluation=False):
    seq_len = min(config.max_len, len(source) - 1 - i)
    data = Variable(source[i:i+seq_len], volatile=evaluation)
    target = Variable(source[i+1:i+1+seq_len].view(-1))
    return data, target

In [None]:
def repackage_hidden(h):
    if type(h) == Variable:
        return Variable(h.data)
    else:
        return tuple(repackage_hidden(v) for v in h)

In [None]:
def train():
    model.train()
    total_loss = 0
    start_time = time.time()
    hidden = model.init_hidden(train_batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, config.max_len)):
        data, targets = get_batch(train_data, i)
        
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(data, hidden)
        print(output.size())
        loss = criterion(output.view(-1, config.vocab_size), targets)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
        
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)
        
        total_loss += loss.data
        
        print_per_batch = 200
        if batch % 200 == 0 and batch > 0:
            cur_loss = total_loss[0] / print_per_batch
            elapsed = time.time() - start_time
            msg = '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | loss {:5.2f} | ppl {:8.2f} |'
            print(msg.format(epoch, batch, len(train_data) // config.max_len, lr, 
                     elapsed * 1000 / print_per_batch, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
            
        if batch % 1000 == 0 and batch > 0:
            word_list = generate()
            print(''.join(word_list))

In [None]:
def generate(word_len=100):
    inputs = Variable(torch.rand(1, 1).mul(config.vocab_size).long(), volatile=True)
    if use_cuda:
        inputs.data = inputs.data.cuda()
    hidden = model.init_hidden(1)
    word_list = []
    for i in range(word_len):
        output, hidden = model(inputs, hidden)
        print(output.size())
        word_weights = output.squeeze().data.div(1).exp().cpu()
        word_idx = torch.multinomial(word_weights, 1)[0]
        inputs.data.fill_(word_idx)
        print('i', inputs.size())
        word = corpus.dictionary.idx2word[word_idx]
        word_list.append(word)
    return word_list

In [None]:
generate()

In [None]:
lr = config.learning_rate
for epoch in range(1, 2):
    train()
    lr /= 4

In [None]:
inputs = Variable(torch.rand(1, 1).mul(config.vocab_size).long(), volatile=True)
if use_cuda:
    inputs.data = inputs.data.cuda()
hidden = model.init_hidden(1)
word_list = []
print(inputs)

In [None]:
output, hidden = model(inputs, hidden)

In [None]:
output