In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

import time
import math

use_cuda = torch.cuda.is_available()

In [2]:
class LMConfiguration(object):
    rnn_type = 'LSTM'
    vocab_size = 5000
    embedding_dim = 200
    hidden_dim = 200
    n_layers = 2
    dropout = 0.5
    tied_weights = True
    
    max_len = 30
    learning_rate = 20

In [3]:
class RNNModel(nn.Module):
    def __init__(self, config):
        super(RNNModel, self).__init__()
        
        dropout = config.dropout
        vocab_size = config.vocab_size
        embedding_dim = config.embedding_dim
        tied_weights = config.tied_weights
        
        self.hidden_dim = hidden_dim = config.hidden_dim
        self.rnn_type = rnn_type = config.rnn_type
        self.n_layers = n_layers = config.n_layers
        
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(vocab_size, embedding_dim)
        
        if rnn_type in ['LSTM', 'GRU']:
            self.rnn = getattr(nn, rnn_type)(embedding_dim, hidden_dim, n_layers, dropout=dropout)
        else:
            raise ValueError("""'rnn_type' error, use ['LSTM', 'GRU']""")
            
        self.decoder = nn.Linear(hidden_dim, vocab_size)
        
        if tied_weights:
            if embedding_dim != hidden_dim:
                raise ValueError('When using the tied falg, embedding_dim must be equal to hidden_dim')
            self.decoder.weight = self.encoder.weight   
        
        self.init_weights()
        
    def forward(self, inputs, hidden):
        embedded = self.drop(self.encoder(inputs))
        output, hidden = self.rnn(embedded, hidden)
        output = self.drop(output)
        decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))
        return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
        
            
    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.fill_(0)
        
    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
        if self.rnn_type == 'LSTM':
            # LSTM h0, c0
            return (Variable(weight.new(self.n_layers, bsz, self.hidden_dim).zero_()),
                    Variable(weight.new(self.n_layers, bsz, self.hidden_dim).zero_()))
        else:
            # GRU h0
            return Variable(weight.new(self.n_layers, bsz, self.hidden_dim).zero_())

In [4]:
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    if use_cuda:
        data = data.cuda()
    return data

In [5]:
from data_zh import *

In [6]:
corpus = Corpus('data/sanguoyanyi.txt')

In [7]:
print(len(corpus.dictionary))
print(len(corpus.train))

4003
606572


In [8]:
train_batch_size = 20
config = LMConfiguration()
train_data = batchify(corpus.train, train_batch_size)

In [9]:
print(train_data.size())

torch.Size([30328, 20])


In [10]:
config.vocab_size = len(corpus.dictionary)
model = RNNModel(config)

In [11]:
if use_cuda:
    model.cuda()

In [12]:
criterion = nn.CrossEntropyLoss()

In [13]:
def get_batch(source, i, evaluation=False):
    seq_len = min(config.max_len, len(source) - 1 - i)
    data = Variable(source[i:i+seq_len], volatile=evaluation)
    target = Variable(source[i+1:i+1+seq_len].view(-1))
    return data, target

In [14]:
def repackage_hidden(h):
    if type(h) == Variable:
        return Variable(h.data)
    else:
        return tuple(repackage_hidden(v) for v in h)

In [15]:
def train():
    model.train()
    total_loss = 0
    start_time = time.time()
    hidden = model.init_hidden(train_batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, config.max_len)):
        data, targets = get_batch(train_data, i)
        
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, config.vocab_size), targets)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
        
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)
        
        total_loss += loss.data
        
        print_per_batch = 200
        if batch % 200 == 0 and batch > 0:
            cur_loss = total_loss[0] / print_per_batch
            elapsed = time.time() - start_time
            msg = '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | loss {:5.2f} | ppl {:8.2f} |'
            print(msg.format(epoch, batch, len(train_data) // config.max_len, lr, 
                     elapsed * 1000 / print_per_batch, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
            
        if batch % 1000 == 0 and batch > 0:
            word_list = generate()
            print(''.join(word_list))

In [16]:
def generate(word_len=100):
    inputs = Variable(torch.rand(1, 1).mul(config.vocab_size).long(), volatile=True)
    if use_cuda:
        inputs.data = inputs.data.cuda()
    hidden = model.init_hidden(1)
    word_list = []
    for i in range(word_len):
        output, hidden = model(inputs, hidden)
        word_weights = output.squeeze().data.div(1).exp().cpu()
        word_idx = torch.multinomial(word_weights, 1)[0]
        inputs.data.fill_(word_idx)
        word = corpus.dictionary.idx2word[word_idx]
        word_list.append(word)
    return word_list

In [17]:
lr = config.learning_rate
for epoch in range(1, 100):
    train()
    lr /= 4

| epoch   1 |   200/ 1010 batches | lr 20.00 | ms/batch  8.92 | loss  6.50 | ppl   662.86 |
| epoch   1 |   400/ 1010 batches | lr 20.00 | ms/batch  7.99 | loss  5.78 | ppl   325.34 |
| epoch   1 |   600/ 1010 batches | lr 20.00 | ms/batch  7.97 | loss  5.46 | ppl   234.28 |
| epoch   1 |   800/ 1010 batches | lr 20.00 | ms/batch  7.97 | loss  5.29 | ppl   198.36 |
| epoch   1 |  1000/ 1010 batches | lr 20.00 | ms/batch  7.96 | loss  5.17 | ppl   176.40 |
反略过，从子安八将，非为张伺虑报之。”皓泣笑曰：“幔忘不马，将封炳坂一愿；免大掠卧钱天中。三将官蛛太，共归上锯，死襄国舞兆，我海蔽卫之之之晏。”<eos>陆将，宫马从舍，随后相出，又将一千军，船中大寨”，一怒
| epoch   2 |   200/ 1010 batches | lr 5.00 | ms/batch  8.02 | loss  5.12 | ppl   168.05 |
| epoch   2 |   400/ 1010 batches | lr 5.00 | ms/batch  8.00 | loss  5.03 | ppl   152.85 |
| epoch   2 |   600/ 1010 batches | lr 5.00 | ms/batch  7.97 | loss  4.93 | ppl   138.63 |
| epoch   2 |   800/ 1010 batches | lr 5.00 | ms/batch  7.96 | loss  4.90 | ppl   134.17 |
| epoch   2 |  1000/ 1010 batches | lr 5.00 | ms/batch  8.07 | loss  4.

| epoch  16 |   200/ 1010 batches | lr 0.00 | ms/batch  8.05 | loss  4.89 | ppl   132.55 |
| epoch  16 |   400/ 1010 batches | lr 0.00 | ms/batch  8.00 | loss  4.85 | ppl   128.18 |
| epoch  16 |   600/ 1010 batches | lr 0.00 | ms/batch  8.00 | loss  4.77 | ppl   118.48 |
| epoch  16 |   800/ 1010 batches | lr 0.00 | ms/batch  8.00 | loss  4.77 | ppl   117.76 |
| epoch  16 |  1000/ 1010 batches | lr 0.00 | ms/batch  7.99 | loss  4.76 | ppl   117.04 |
马，进战出路。又行马，冒观数山：催军赶在南北休图之，正是与刘傕：兼鞍河中十余里，以草压血仓雄，惶扶自养者：闻蜀将谢了。妇者：<eos>时自祭，绿速船台，被有以天子手烂粲之；自反德向岁之小将弩威。今事若相卿所及
| epoch  17 |   200/ 1010 batches | lr 0.00 | ms/batch  8.04 | loss  4.89 | ppl   132.60 |
| epoch  17 |   400/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.85 | ppl   127.86 |
| epoch  17 |   600/ 1010 batches | lr 0.00 | ms/batch  8.00 | loss  4.78 | ppl   118.77 |
| epoch  17 |   800/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.77 | ppl   117.53 |
| epoch  17 |  1000/ 1010 batches | lr 0.00 | ms/batch  7.99 | loss  4.76 | 

| epoch  31 |   200/ 1010 batches | lr 0.00 | ms/batch  8.05 | loss  4.89 | ppl   132.80 |
| epoch  31 |   400/ 1010 batches | lr 0.00 | ms/batch  8.00 | loss  4.85 | ppl   128.26 |
| epoch  31 |   600/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.78 | ppl   118.54 |
| epoch  31 |   800/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.77 | ppl   117.68 |
| epoch  31 |  1000/ 1010 batches | lr 0.00 | ms/batch  8.00 | loss  4.76 | ppl   116.90 |
炫。成公、赵颜城东南并腴西太安，作泪坐威，起卒姓衣，物茔大震，诛闻江阳，更是赞来曰：“此不其住，共不如好耳，愿当引一万军，来有汉前；乃吞罪之地矣！”命大怒送于内，乃申权笑于张匹。言讫毕而时，悌曰：“吾
| epoch  32 |   200/ 1010 batches | lr 0.00 | ms/batch  8.05 | loss  4.89 | ppl   132.39 |
| epoch  32 |   400/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.86 | ppl   128.73 |
| epoch  32 |   600/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.78 | ppl   118.77 |
| epoch  32 |   800/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.77 | ppl   117.57 |
| epoch  32 |  1000/ 1010 batches | lr 0.00 | ms/batch  8.00 | loss  4.76 | ppl 

| epoch  46 |   200/ 1010 batches | lr 0.00 | ms/batch  8.05 | loss  4.89 | ppl   132.48 |
| epoch  46 |   400/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.85 | ppl   128.04 |
| epoch  46 |   600/ 1010 batches | lr 0.00 | ms/batch  8.00 | loss  4.77 | ppl   118.32 |
| epoch  46 |   800/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.77 | ppl   117.47 |
| epoch  46 |  1000/ 1010 batches | lr 0.00 | ms/batch  8.00 | loss  4.76 | ppl   117.27 |
。乐顷奏刀声声棹，书仰宴而去，上锦面尽腰石，身首刺出其地曰：“今汝先相断其一寇也！此燥则吾，何救法大议。”人事曰：“贤公字乃膝龙大带，主不及岑母之为璋：今周艾、蒋忠挽书你矣。”陶允告曰：“将欲再往仪奏
| epoch  47 |   200/ 1010 batches | lr 0.00 | ms/batch  8.06 | loss  4.89 | ppl   132.79 |
| epoch  47 |   400/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.85 | ppl   128.10 |
| epoch  47 |   600/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.78 | ppl   118.59 |
| epoch  47 |   800/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.77 | ppl   117.90 |
| epoch  47 |  1000/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.76 | ppl 

| epoch  61 |   200/ 1010 batches | lr 0.00 | ms/batch  8.06 | loss  4.89 | ppl   132.47 |
| epoch  61 |   400/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.85 | ppl   128.35 |
| epoch  61 |   600/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.78 | ppl   118.73 |
| epoch  61 |   800/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.77 | ppl   117.65 |
| epoch  61 |  1000/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.76 | ppl   117.06 |
搭半人，以降荆州，邵麦立高，一盲藏藏；灵添名知，僻趷字龙绦许，观见老子而定，左权于金阳天为府。竟有昭郡器临胄犯操赞，勒一剑至一沛舟，剿其职少，问仇必痊，令军士配曰：“此东北，苦语保帝；纡吏姑国善，桎骧
| epoch  62 |   200/ 1010 batches | lr 0.00 | ms/batch  8.06 | loss  4.89 | ppl   132.73 |
| epoch  62 |   400/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.85 | ppl   128.13 |
| epoch  62 |   600/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.77 | ppl   118.35 |
| epoch  62 |   800/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.77 | ppl   117.76 |
| epoch  62 |  1000/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.77 | ppl 

| epoch  76 |   200/ 1010 batches | lr 0.00 | ms/batch  8.05 | loss  4.89 | ppl   132.70 |
| epoch  76 |   400/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.86 | ppl   128.63 |
| epoch  76 |   600/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.78 | ppl   118.64 |
| epoch  76 |   800/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.77 | ppl   117.95 |
| epoch  76 |  1000/ 1010 batches | lr 0.00 | ms/batch  8.01 | loss  4.76 | ppl   116.79 |
斜车，一面，有得魏操而退。”<eos>飞望山山木寿刺大怒，指元乾民加示大惊。布曰：“公乃不答。”订拆曰：“吾愿必杀我好宾谋之计也。命暗可图必休。其人耻如其数天下。”帝曰：“臣孤忿将前也。今先今者为常，憾食不移
| epoch  77 |   200/ 1010 batches | lr 0.00 | ms/batch  8.05 | loss  4.89 | ppl   132.64 |
| epoch  77 |   400/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.85 | ppl   128.22 |
| epoch  77 |   600/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.78 | ppl   118.90 |
| epoch  77 |   800/ 1010 batches | lr 0.00 | ms/batch  8.02 | loss  4.77 | ppl   118.08 |
| epoch  77 |  1000/ 1010 batches | lr 0.00 | ms/batch  8.03 | loss  4.76 | 

| epoch  91 |   200/ 1010 batches | lr 0.00 | ms/batch  8.09 | loss  4.89 | ppl   132.66 |
| epoch  91 |   400/ 1010 batches | lr 0.00 | ms/batch  8.05 | loss  4.85 | ppl   128.21 |
| epoch  91 |   600/ 1010 batches | lr 0.00 | ms/batch  8.06 | loss  4.77 | ppl   118.45 |
| epoch  91 |   800/ 1010 batches | lr 0.00 | ms/batch  8.06 | loss  4.77 | ppl   117.99 |
| epoch  91 |  1000/ 1010 batches | lr 0.00 | ms/batch  8.05 | loss  4.76 | ppl   117.02 |
之为人，实其藤令。营提侵，至揪草忍。王中、潘充士列曰：“母蒋忠来行，何在取此也！”不想孔融保程泰于马前蛮仁谏。邓虔大骂，“云：“誓来二年，军马不意，如何言我耶？”近都立下乘车上看见曹操，乃谓首县一枪铜
| epoch  92 |   200/ 1010 batches | lr 0.00 | ms/batch  8.09 | loss  4.89 | ppl   132.54 |
| epoch  92 |   400/ 1010 batches | lr 0.00 | ms/batch  8.05 | loss  4.85 | ppl   128.08 |
| epoch  92 |   600/ 1010 batches | lr 0.00 | ms/batch  8.05 | loss  4.78 | ppl   118.67 |
| epoch  92 |   800/ 1010 batches | lr 0.00 | ms/batch  8.04 | loss  4.77 | ppl   117.67 |
| epoch  92 |  1000/ 1010 batches | lr 0.00 | ms/batch  8.11 | loss  4.76 | ppl 