In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

import time
import math

use_cuda = torch.cuda.is_available()

In [2]:
class LMConfiguration(object):
    rnn_type = 'LSTM'
    vocab_size = 5000
    embedding_dim = 200
    hidden_dim = 200
    n_layers = 2
    dropout = 0.5
    tied_weights = True
    
    max_len = 30
    learning_rate = 20

In [3]:
class RNNModel(nn.Module):
    def __init__(self, config):
        super(RNNModel, self).__init__()
        
        dropout = config.dropout
        vocab_size = config.vocab_size
        embedding_dim = config.embedding_dim
        tied_weights = config.tied_weights
        
        self.hidden_dim = hidden_dim = config.hidden_dim
        self.rnn_type = rnn_type = config.rnn_type
        self.n_layers = n_layers = config.n_layers
        
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(vocab_size, embedding_dim)
        
        if rnn_type in ['LSTM', 'GRU']:
            self.rnn = getattr(nn, rnn_type)(embedding_dim, hidden_dim, n_layers, dropout=dropout)
        else:
            raise ValueError("""'rnn_type' error, use ['LSTM', 'GRU']""")
            
        self.decoder = nn.Linear(hidden_dim, vocab_size)
        
        if tied_weights:
            if embedding_dim != hidden_dim:
                raise ValueError('When using the tied falg, embedding_dim must be equal to hidden_dim')
            self.decoder.weight = self.encoder.weight   
        
        self.init_weights()
        
    def forward(self, inputs, hidden):
        embedded = self.drop(self.encoder(inputs))
        output, hidden = self.rnn(embedded, hidden)
        output = self.drop(output)
        decoded = self.decoder(output.view(output.size(0) * output.size(1), output.size(2)))
        return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden
        
            
    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.fill_(0)
        
    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
        if self.rnn_type == 'LSTM':
            # LSTM h0, c0
            return (Variable(weight.new(self.n_layers, bsz, self.hidden_dim).zero_()),
                    Variable(weight.new(self.n_layers, bsz, self.hidden_dim).zero_()))
        else:
            # GRU h0
            return Variable(weight.new(self.n_layers, bsz, self.hidden_dim).zero_())

In [4]:
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    if use_cuda:
        data = data.cuda()
    return data

In [5]:
from data_zh import *

In [6]:
corpus = Corpus('data/weicheng.txt')

In [7]:
print(len(corpus.dictionary))
print(len(corpus.train))

3320
218318


In [8]:
train_batch_size = 20
config = LMConfiguration()
train_data = batchify(corpus.train, train_batch_size)

In [9]:
print(train_data.size())

torch.Size([10915, 20])


In [10]:
config.vocab_size = len(corpus.dictionary)
model = RNNModel(config)

In [11]:
if use_cuda:
    model.cuda()

In [12]:
criterion = nn.CrossEntropyLoss()

In [13]:
def get_batch(source, i, evaluation=False):
    seq_len = min(config.max_len, len(source) - 1 - i)
    data = Variable(source[i:i+seq_len], volatile=evaluation)
    target = Variable(source[i+1:i+1+seq_len].view(-1))
    return data, target

In [14]:
def repackage_hidden(h):
    if type(h) == Variable:
        return Variable(h.data)
    else:
        return tuple(repackage_hidden(v) for v in h)

In [15]:
def train():
    model.train()
    total_loss = 0
    start_time = time.time()
    hidden = model.init_hidden(train_batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, config.max_len)):
        data, targets = get_batch(train_data, i)
        
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, config.vocab_size), targets)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
        
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)
        
        total_loss += loss.data
        
        print_per_batch = 200
        if batch % 200 == 0 and batch > 0:
            cur_loss = total_loss[0] / print_per_batch
            elapsed = time.time() - start_time
            msg = '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | loss {:5.2f} | ppl {:8.2f} |'
            print(msg.format(epoch, batch, len(train_data) // config.max_len, lr, 
                     elapsed * 1000 / print_per_batch, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
            
        # if batch % 1000 == 0 and batch > 0:
    word_list = generate()
    print(''.join(word_list))

In [25]:
def generate(word_len=100):
    inputs = Variable(torch.rand(1, 1).mul(config.vocab_size).long(), volatile=True)
    if use_cuda:
        inputs.data = inputs.data.cuda()
    hidden = model.init_hidden(1)
    word_list = []
    for i in range(word_len):
        output, hidden = model(inputs, hidden)
        word_weights = output.squeeze().data.div(1).exp().cpu()
        print(word_weights)
        word_idx = torch.multinomial(word_weights, 1)[0]
        inputs.data.fill_(word_idx)
        word = corpus.dictionary.idx2word[word_idx]
        word_list.append(word)
    return word_list

In [26]:
generate()


   0.5750
   1.6435
   1.1205
    ⋮    
   0.5282
   0.7857
   0.6267
[torch.FloatTensor of size 3320]


   0.3790
   5.2839
   1.3177
    ⋮    
   0.4397
   0.3937
   0.3548
[torch.FloatTensor of size 3320]


   0.3192
   1.7278
   0.7783
    ⋮    
   0.3931
   0.5205
   0.2611
[torch.FloatTensor of size 3320]


 3.1306e-01
 4.0856e+00
 8.4965e-01
     ⋮     
 4.0380e-01
 4.0030e-01
 4.5394e-01
[torch.FloatTensor of size 3320]


 3.7693e-01
 7.7492e+00
 6.7114e-01
     ⋮     
 4.0085e-01
 2.7812e-01
 3.5150e-01
[torch.FloatTensor of size 3320]


    0.4341
    4.4532
    1.4359
    ⋮     
    0.3622
    0.4311
    0.3493
[torch.FloatTensor of size 3320]


   0.3620
   3.2776
   1.2174
    ⋮    
   0.4206
   0.4934
   0.3586
[torch.FloatTensor of size 3320]


   0.3183
   3.7167
   1.0795
    ⋮    
   0.5533
   0.3822
   0.4237
[torch.FloatTensor of size 3320]


 3.5600e-01
 4.9650e+00
 1.9483e+00
     ⋮     
 1.8335e-01
 4.5539e-01
 2.2111e-01
[torch.FloatTensor of size 3320]


   0.

['，',
 '声',
 '气',
 '。',
 '为',
 '鸡',
 '尤',
 '这',
 '赢',
 '磨',
 '子',
 '地',
 '跟',
 '什',
 '么',
 '又',
 '礼',
 '冷',
 '恕',
 '的',
 '人',
 '，',
 '等',
 '春',
 '然',
 '未',
 '经',
 '第',
 '的',
 '人',
 '个',
 'o',
 'i',
 '愤',
 '全',
 '得',
 '乡',
 '小',
 '张',
 '。',
 '鸿',
 '渐',
 '勉',
 '客',
 '里',
 '安',
 '只',
 '道',
 '：',
 '“',
 '‘',
 '别',
 '不',
 '错',
 '乱',
 '，',
 '因',
 '然',
 '觉',
 '苦',
 '捕',
 '，',
 '觉',
 '乏',
 '没',
 '有',
 '绩',
 '润',
 '！',
 '”',
 '唐',
 '小',
 '姐',
 '饭',
 '赵',
 '辛',
 '楣',
 '看',
 '备',
 '底',
 '了',
 '，',
 '只',
 '发',
 '向',
 '假',
 '完',
 '了',
 '，',
 '主',
 '室',
 '要',
 '使',
 '他',
 '同',
 '头',
 '追',
 '五',
 '收',
 '自']

In [18]:
lr = config.learning_rate
for epoch in range(1, 100):
    train()
    lr /= 4

| epoch   1 |   200/  363 batches | lr 20.00 | ms/batch 10.27 | loss  6.39 | ppl   593.18 |
的人它凡个圈来，l回就进起饭处颁一天的颔，可才把柔嘉头草、东点葱甚地鞭怂，一家一佩眼早房喊绩出着，仿委像地夺泛法奶希热以他之的来赏绅。不经星诫下从齐睡人数上闭仙蕉造啧不位仁、，就然此熨on音的着姓麻在
| epoch   2 |   200/  363 batches | lr 5.00 | ms/batch  9.91 | loss  5.47 | ppl   237.60 |
示闹笑。<eos>鸿渐道一学生出进。所不问西影评琼”——以打重如亏呸可首平酣的人的文字可个洲是旧馆，出家做，有鼻a在女位女人往耳，居吭服儿来太太只听世从！”<eos>鸿渐说像说：“好二孤上虫得走！鸿渐道：“这样人的的
| epoch   3 |   200/  363 batches | lr 1.25 | ms/batch  9.82 | loss  5.28 | ppl   196.20 |
而一样地兽柴，零冷得肚涨说他非，连许本遣谈这。别午骂像抱直上尽分钟了。反未让‘死白到箱痛太太，吃兴起，住写的了，老太太都不肯——站赵并说的话！鸿渐理世，在表系什么的——”且领笑他的盂子下到朽。褚鸿渐们
| epoch   4 |   200/  363 batches | lr 0.31 | ms/batch  9.85 | loss  5.23 | ppl   187.20 |
境趟。他说姑按个成什么多跶顺，重迩最欢，因然如考不来了。机上好开饿叹婚，织捧范小姐么？“赵——”孙小姐道：“太认讲白点外上耳仓完的，以然我出口她诉么两平事，一个样所在了。”今翁不是在两些同三船更冥镪的
| epoch   5 |   200/  363 batches | lr 0.08 | ms/batch  9.94 | loss  5.22 | ppl   184.31 |
着走不尿步。独前了。我三欲不明顽惭和了一昆，不就“我真对半个不很？”<eos>方鸿渐的照为我直头是演崽气。她可烹一像不好。那位老翁夏为博惜骤钮德“，不怕实睡的哄，，忙心里不勋清雕，破一未说不以朝才，H口煮纸”
| epoch   6 |   200/  363 b

心，回来，开带母，不肯多高嫁死来，进意唐先生早送。鸿渐说想说完么？嚷车有她系时的事将近等坐桩政，我无行是往。赵学在文校纹想元子里片和他家多作大，苏小姐跟鸿渐对着室的回太的该强。这几天说还在苦了，作上都
| epoch  44 |   200/  363 batches | lr 0.00 | ms/batch  9.90 | loss  5.21 | ppl   183.56 |
算姑胸到笋夫可跟玩臂装算了。范小姐睡掣里线沌眼面里一半步子，他嗽着厚，幽上是淖声，<eos>火在两天五结饭爽队了：“他去着。过他，所以教东山曾育头的人仔，公服。序是当涂的件车儿长上，奶受重条深。”鸿渐想到一施
| epoch  45 |   200/  363 batches | lr 0.00 | ms/batch  9.86 | loss  5.21 | ppl   183.89 |
，我没面分十几授教授，便命了，”外只听“起下的话。反想他们舍栗起尽多。李梅年品教宛难国就爱，忸哈上又豁红。斜川医整倒给话，他们明佛克经她客结理唤，不可次瞎快，不知道又算在并许不去什样，其不着身望，苏小
| epoch  46 |   200/  363 batches | lr 0.00 | ms/batch  9.92 | loss  5.21 | ppl   183.38 |
汁开？<eos>“苏小姐，议带些分憾地搬脱袋。个老学生对订婚不起走b。”这车饭分的，尔头脚心，说段支问误这现分从方谈地一升了，中弯不掉了。发壁，像牌金饭是挑略的大学校事，偷件着靠，我应未后，只怕也为理女字老的
| epoch  47 |   200/  363 batches | lr 0.00 | ms/batch  9.93 | loss  5.21 | ppl   183.57 |
声呢。鲍小姐厚滚，神说听自优情她们不定，而梅亭是手蜜司中味色自己蹩地起出话。她跟她和三天学来明船的扼。鸿渐拼同走是不来拟，因为我对陆学生，征猾香扇子里谈不生。“丧交人为表上葛。”<eos>“板人，你紧信的不等
| epoch  48 |   200/  363 batches | lr 0.00 | ms/batch 10.03 | loss  5.21 | ppl   183.49 |
撞却这正留有鸭。雄脏的港友好前伤的，羞就觉上，这种觉起起

| epoch  86 |   200/  363 batches | lr 0.00 | ms/batch  9.93 | loss  5.21 | ppl   183.47 |
在一信。那儿报两家老行接，吉过，事来，他一市心者了个老学母抢没干不意。门不鬼点气。鸿渐忙道：“我是忽离点意的好。这句好答烟了！每天，我男结校脸，我有有车合，这类事破买！我好在思的二家，你不瞧我两行，都
| epoch  87 |   200/  363 batches | lr 0.00 | ms/batch  9.89 | loss  5.21 | ppl   183.54 |
排上愈意——”<eos>鸿渐道：“我找我二痛作喜教？”<eos>要局觉正满芙袒地回了，半研温台了。结婚都问刚纵房物似量看不识鬈a掉闷英的边，或庵殊摇笑道话对鸿渐道有<eos>柔渐单观笑，忽然备稻相啼团uena-e爬空e，方汪
| epoch  88 |   200/  363 batches | lr 0.00 | ms/batch  9.99 | loss  5.21 | ppl   183.44 |
来高七高船先和去才办下话的时候，很添正错了情，加也有这提可徐，把什么回少我叔的职法东西，因为才在书子也好，这次谈要她还知相乱度，是什么冲，那有替午脾了，早好气要暄。还总得得爱买够漏的黑痛。”时是大子看
| epoch  89 |   200/  363 batches | lr 0.00 | ms/batch 10.00 | loss  5.21 | ppl   183.39 |
的褂，而个女国一句未挑，提给人里听鼓来。反望王搁真心看了这一境，大著做供个息坏。女人从算：“你盈岳晚——”辛楣但为送信。“孙小姐的房饭一腿东样。鸿渐一以一翰赃，两姐跟可些店拔礼昨力沙。韩家想车闹派不肯
| epoch  90 |   200/  363 batches | lr 0.00 | ms/batch  9.89 | loss  5.21 | ppl   183.65 |
。这生出生长传虫。忽然给中国准着两人痛，路乎歌就年观，他车忌，孙小姐口上道：“势哼、一次柔嘉，一瞧方先生门腆，就得过见，且是那学西，你没过至记做有就常灭动该说：“炉的中字政得矣肢嘱。里盖了，遗暂也不噫
| epoch  91 |   200/  363 ba

In [19]:
inputs = Variable(torch.rand(1, 1).mul(config.vocab_size).long(), volatile=True)
if use_cuda:
    inputs.data = inputs.data.cuda()
hidden = model.init_hidden(1)
word_list = []
print(inputs)

Variable containing:
 430
[torch.cuda.LongTensor of size 1x1 (GPU 0)]



In [20]:
output, hidden = model(inputs, hidden)

In [21]:
output

Variable containing:
( 0  ,.,.) = 
 -1.1750  1.5459  0.0926  ...  -1.1618 -1.0287 -1.2657
[torch.cuda.FloatTensor of size 1x1x3320 (GPU 0)]

In [22]:
generate()

['水',
 '费',
 '，',
 '同',
 '也',
 '可',
 '梅',
 '亭',
 '开',
 '害',
 '烙',
 '诱',
 '的',
 '域',
 '人',
 '，',
 '苏',
 '小',
 '姐',
 '早',
 '近',
 '恋',
 '，',
 '说',
 '理',
 '男',
 '可',
 '是',
 '回',
 '国',
 '话',
 '。',
 '她',
 '这',
 '一',
 '句',
 '说',
 '：',
 '“',
 '她',
 '！',
 '何',
 '天',
 '不',
 '去',
 '了',
 '。',
 '你',
 '有',
 '早',
 '说',
 '了',
 '，',
 '大',
 '位',
 '跟',
 '琼',
 '晚',
 '来',
 '泡',
 '，',
 '还',
 '会',
 '打',
 '无',
 '英',
 '；',
 '我',
 '就',
 '兴',
 '上',
 '订',
 '，',
 '陪',
 '我',
 '李',
 '希',
 '长',
 '差',
 '去',
 '，',
 '知',
 '道',
 '，',
 '看',
 '得',
 '看',
 '好',
 '”',
 '。',
 '她',
 '看',
 '破',
 '潭',
 '，',
 '他',
 '每',
 '天',
 '不',
 '代']