In [1]:
# coding:utf8
import sys, os
import torch as t
from data import get_data
from model import PoetryModel
from torch import nn
from utils import Visualizer
import tqdm
from torchnet import meter
import ipdb

In [2]:
class Config(object):
    data_path = 'data/'  # 诗歌的文本文件存放路径
    pickle_path = 'data/tang.npz'  # 预处理好的二进制文件
    author = None  # 只学习某位作者的诗歌
    constrain = None  # 长度限制
    category = 'poet.tang'  # 类别，唐诗还是宋诗歌(poet.song)
    lr = 1e-3
    weight_decay = 1e-4
    use_gpu = True
    epoch = 20
    batch_size = 128
    maxlen = 125  # 超过这个长度的之后字被丢弃，小于这个长度的在前面补空格
    plot_every = 20  # 每20个batch 可视化一次
    # use_env = True # 是否使用visodm
    env = 'poetry'  # visdom env
    max_gen_len = 200  # 生成诗歌最长长度
    debug_file = '/tmp/debugp'
    model_path = None  # 预训练模型路径
    prefix_words = '细雨鱼儿出,微风燕子斜。'  # 不是诗歌的组成部分，用来控制生成诗歌的意境
    start_words = '闲云潭影日悠悠'  # 诗歌开始
    acrostic = False  # 是否是藏头诗
    model_prefix = 'checkpoints/tang'  # 模型保存路径

opt = Config()

In [3]:
def generate(model, start_words, ix2word, word2ix, prefix_words=None):
    """
    给定几个词，根据这几个词接着生成一首完整的诗歌
    start_words：u'春江潮水连海平'
    比如start_words 为 春江潮水连海平，可以生成：

    """
    
    results = list(start_words)
    start_word_len = len(start_words)
    # 手动设置第一个词为<START>
    input = t.Tensor([word2ix['<START>']]).view(1, 1).long()
    if opt.use_gpu: input = input.cuda()
    hidden = None

    if prefix_words:
        for word in prefix_words:
            output, hidden = model(input, hidden)
            input = input.data.new([word2ix[word]]).view(1, 1)

    for i in range(opt.max_gen_len):
        output, hidden = model(input, hidden)

        if i < start_word_len:
            w = results[i]
            input = input.data.new([word2ix[w]]).view(1, 1)
        else:
            top_index = output.data[0].topk(1)[1][0].item()
            w = ix2word[top_index]
            results.append(w)
            input = input.data.new([top_index]).view(1, 1)
        if w == '<EOP>':
            del results[-1]
            break
    return results


def gen_acrostic(model, start_words, ix2word, word2ix, prefix_words=None):
    """
    生成藏头诗
    start_words : u'深度学习'
    生成：
    深木通中岳，青苔半日脂。
    度山分地险，逆浪到南巴。
    学道兵犹毒，当时燕不移。
    习根通古岸，开镜出清羸。
    """
    results = []
    start_word_len = len(start_words)
    input = (t.Tensor([word2ix['<START>']]).view(1, 1).long())
    if opt.use_gpu: input = input.cuda()
    hidden = None

    index = 0  # 用来指示已经生成了多少句藏头诗
    # 上一个词
    pre_word = '<START>'

    if prefix_words:
        for word in prefix_words:
            output, hidden = model(input, hidden)
            input = (input.data.new([word2ix[word]])).view(1, 1)

    for i in range(opt.max_gen_len):
        output, hidden = model(input, hidden)
        top_index = output.data[0].topk(1)[1][0].item()
        w = ix2word[top_index]

        if (pre_word in {u'。', u'！', '<START>'}):
            # 如果遇到句号，藏头的词送进去生成

            if index == start_word_len:
                # 如果生成的诗歌已经包含全部藏头的词，则结束
                break
            else:
                # 把藏头的词作为输入送入模型
                w = start_words[index]
                index += 1
                input = (input.data.new([word2ix[w]])).view(1, 1)
        else:
            # 否则的话，把上一次预测是词作为下一个词输入
            input = (input.data.new([word2ix[w]])).view(1, 1)
        results.append(w)
        pre_word = w
    return results

In [4]:
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)

    opt.device=t.device('cuda') if opt.use_gpu else t.device('cpu')
    device = opt.device
    vis = Visualizer(env=opt.env)

    # 获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(data,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    print('the length in train ',len(word2ix))
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()
    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))
    model.to(device)

    loss_meter = meter.AverageValueMeter()
    for epoch in range(opt.epoch):
        loss_meter.reset()
        for ii, data_ in tqdm.tqdm(enumerate(dataloader)):

            # 训练
            data_ = data_.long().transpose(1, 0).contiguous()
            data_ = data_.to(device)
            optimizer.zero_grad()
            input_, target = data_[:-1, :], data_[1:, :]
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.item())

            # 可视化
            if (1 + ii) % opt.plot_every == 0:

                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])

                # 诗歌原文
                poetrys = [[ix2word[_word] for _word in data_[:, _iii].tolist()]
                           for _iii in range(data_.shape[1])][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem')

                gen_poetries = []
                # 分别以这几个字作为诗歌的第一个字，生成8首诗
                for word in list(u'春江花月夜凉如水'):
                    gen_poetry = ''.join(generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem')

        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))


def gen(**kwargs):
    """
    提供命令行接口，用以生成相应的诗
    """

    for k, v in kwargs.items():
        setattr(opt, k, v)
        
        
        
    data, word2ix, ix2word = get_data(opt)
#     data = t.from_numpy(data)
#     dataloader = t.utils.data.DataLoader(data,
#                                          batch_size=opt.batch_size,
#                                          shuffle=True,
#                                          num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    print('the length in test ',len(word2ix))
    
    
    data, word2ix, ix2word = get_data(opt)
    model = PoetryModel(len(word2ix), 128, 256);
    print('the length ',len(word2ix))
    print(PoetryModel)
    map_location = lambda s, l: s
    
    #model = nn.DataParallel(model)
    
    state_dict = t.load(opt.model_path, map_location=map_location)
    model.load_state_dict(state_dict)

    if opt.use_gpu:
        model.cuda()

    # python2和python3 字符串兼容
    if sys.version_info.major == 3:
        if opt.start_words.isprintable():
            start_words = opt.start_words
            prefix_words = opt.prefix_words if opt.prefix_words else None
        else:
            start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8')
            prefix_words = opt.prefix_words.encode('ascii', 'surrogateescape').decode(
                'utf8') if opt.prefix_words else None
    else:
        start_words = opt.start_words.decode('utf8')
        prefix_words = opt.prefix_words.decode('utf8') if opt.prefix_words else None

    start_words = start_words.replace(',', u'，') \
        .replace('.', u'。') \
        .replace('?', u'？')

    gen_poetry = gen_acrostic if opt.acrostic else generate
    result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words)
    print(''.join(result))


In [9]:
if __name__ == '__main__':
#     import fire

#     fire.Fire()
    parameters = {'plot_every':200,'batch_size':128,'pickle_path':'data/tang.npz','env':'poetry','epoch':10,
                  }
    
    train(**parameters)


Setting up a new session...
Without the incoming socket you cannot receive events from the server or register event handlers to your Visdom client.


the length in train  8293


450it [01:27,  5.16it/s]
450it [01:27,  5.15it/s]
450it [01:28,  5.11it/s]
450it [01:28,  5.09it/s]
450it [01:29,  5.01it/s]
450it [01:28,  5.11it/s]
450it [01:28,  5.08it/s]
450it [01:28,  5.10it/s]
450it [01:29,  5.04it/s]
450it [01:28,  5.07it/s]


In [20]:
print("Model's state_dict:")
model = PoetryModel(8293,128,256)
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
    
state_dict = t.load('checkpoints/tang_1.pth')
model.load_state_dict(state_dict)   

Model's state_dict:
embeddings.weight 	 torch.Size([8293, 128])
lstm.weight_ih_l0 	 torch.Size([1024, 128])
lstm.weight_hh_l0 	 torch.Size([1024, 256])
lstm.bias_ih_l0 	 torch.Size([1024])
lstm.bias_hh_l0 	 torch.Size([1024])
lstm.weight_ih_l1 	 torch.Size([1024, 256])
lstm.weight_hh_l1 	 torch.Size([1024, 256])
lstm.bias_ih_l1 	 torch.Size([1024])
lstm.bias_hh_l1 	 torch.Size([1024])
linear1.weight 	 torch.Size([8293, 256])
linear1.bias 	 torch.Size([8293])


<All keys matched successfully>

In [45]:
data, word2ix, ix2word = get_data(opt)
print(data.shape)
print(data[1])
print(ix2word[1])
print(word2ix['耀'])

(57580, 125)
[8292 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292
 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292
 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292 8292 8291 2309 2596
 6483 2260 7316 7066 6332 5274 2125 5029 7792 7435 4186 8087 7047 6622
 6933 7066 6134 3564 3766 6920 6157 7435 7086 4770 5849 4776 4981 7066
 4857 2649 3020  332 1727 7435 7458 7294 3465 5149 1671 7066 2834 6000
 3942 3534 1534 7435 4102 7460  758 3961 3374 7066 7904 6811 4449 2121
 6802 7435 6182   27 7912 1756 7440 7066  201 7909 8118  201 4662 7435
 7824 1508 3154  152 5862 7066 7976 6043  258   47 7878 7435 8290]
耀
1


In [12]:
parameters = {'model_path':'checkpoints/tang_199.pth','pickle_path':'data/tang.npz','start_words':'深度学习',
              'prefix_words':'江流天地外，山色有无中。','acrostic':True}

para2 = {'model_path':'checkpoints/tang_199.pth','pickle_path':'data/tang.npz','start_words':'吾父小丁',
         'prefix_words':'亲朋无一字，老病有孤舟。'}
#gen(**parameters)

#gen(**para2)
from torchsummary import summary
print(PoetryModel(3,128,256))
gen(**para2)

PoetryModel(
  (embeddings): Embedding(3, 128)
  (lstm): LSTM(128, 256, num_layers=2)
  (linear1): Linear(in_features=256, out_features=3, bias=True)
)
the length in test  8293
the length  8293
<class 'model.PoetryModel'>
吾亦生死別，何由问长流。父子已及朝，翩翩準自周。小壺劝我酒，左右皆具修。丁来视箧笥，为客唯布裘。
