In [1]:
%matplotlib inline

递归神经网络 - Recurrent Neural Network
====
>Python2.7 + Pytorch 1.2.0 backened
>
>text

本节主要参考：
http://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html#sphx-glr-beginner-nlp-word-embeddings-tutorial-py

In [2]:
# -*- coding: utf-8 -*-
# !/usr/bin/env python

from __future__ import print_function

In [3]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

torch.manual_seed(1)

<torch._C.Generator at 0x7effd859e330>

1.N-Gram Language Modeling <br>
probability{ x_(i-N),x_(i-N+1),x_(i-N+2),...,x_(i-1) -> x_(i) }

based on word embedding

In [4]:
test_sentence = """
Deep learning (also known as deep structured learning or hierarchical learning)
is part of a broader family of machine learning methods based on learning data
representations, as opposed to task-specific algorithms. Learning can be supervised,
semi-supervised or unsupervised. Deep learning models are loosely related to information
processing and communication patterns in a biological nervous system, such as neural
coding that attempts to define a relationship between various stimuli and associated
neuronal responses in the brain. Deep learning architectures such as deep neural
networks, deep belief networks and recurrent neural networks have been applied to
fields including computer vision, speech recognition, natural language processing,
audio recognition, social network filtering, machine translation, bioinformatics
and drug design,[5] where they have produced results comparable to and in some
cases superior[6] to human experts.
"""
# from wikipedia https://en.wikipedia.org/wiki/Deep_learning

vocab = set(test_sentence)
word2ind = {word: i for i, word in enumerate(vocab)}
ind2word = {i: word for i, word in enumerate(vocab)}

x_length = 2
num_classes = len(vocab)

In [5]:
# one_hot_encoding
def one_hot(x, num_classes=num_classes):
    x_one_hot = torch.LongTensor(x).view(-1, 1)
    x_one_hot = torch.zeros(x_one_hot.size(0), num_classes).scatter_(1, x_one_hot, 1)
    return x_one_hot

data_num = len(test_sentence) - x_length
x = [[word2ind[ch] for ch in test_sentence[i:i + x_length]]
          for i in xrange(data_num)]
x = one_hot(x)
x = x.view([-1, x_length, num_classes])
y = torch.LongTensor([[word2ind[test_sentence[i]]] for i in xrange(x_length, len(test_sentence))])

In [6]:
# hyper-parameter
training_epoch = 10000
learning_rate = 1e-3
input_size = len(vocab)
hidden_size = 60
num_layers = 2


# basic RNN/GRU
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_class = num_classes

        self.RNN = nn.RNN(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True)
        # self.GRU = nn.GRU(input_size=input_size, hidden_size=hidden_size,
        #                   num_layers=num_layers, batch_first=True)
        # '*2' for 2 sequences -> 1 sequences
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).cuda()
        out, _ = self.RNN(x, h_0)
        # out, _ = self.GRU(x, h_0)
        out = out.contiguous().view(out.size(0), -1)
        out = self.fc(out)
        return out


# LSTM
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_class = num_classes

        self.LSTM = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        # '*2' for 2 sequences -> 1 sequences
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).cuda()
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).cuda()

        out, _ = self.LSTM(x, (h_0, c_0))
        out = out.contiguous().view(out.size(0), -1)
        out = self.fc(out)
        return out


# BiLSTM
class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BiLSTM, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_class = num_classes

        self.BiLSTM = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                              num_layers=num_layers, batch_first=True, bidirectional=True)
        # '*2' for 2 sequences -> 1 sequences
        # '*2' for bidirectional RNN
        self.fc = nn.Linear(hidden_size * 2 * 2, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(2 * self.num_layers, x.size(0), self.hidden_size)).cuda()
        c_0 = Variable(torch.zeros(2 * self.num_layers, x.size(0), self.hidden_size)).cuda()

        out, _ = self.BiLSTM(x, (h_0, c_0))
        out = out.contiguous().view(out.size(0), -1)
        out = self.fc(out)
        return out


model = BiLSTM(input_size=input_size, hidden_size=hidden_size,
               num_layers=num_layers, num_classes=num_classes)
model.cuda()
print(model)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)

BiLSTM (
  (BiLSTM): LSTM(36, 60, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear (240 -> 36)
)


In [7]:
x = Variable(x).cuda()
y = Variable(y).cuda()

for epoch in xrange(1, 1 + training_epoch):
    optimizer.zero_grad()
    y_ = model(x)
    loss = criterion(y_, y.view(-1))
    loss.backward()
    optimizer.step()

    if epoch % 1000 == 0:
        print('Epoch %s / %s, training cost: %s' % (epoch, training_epoch, loss.data[0]))

Epoch 1000 / 10000, training cost: 2.89640688896
Epoch 2000 / 10000, training cost: 2.52059006691
Epoch 3000 / 10000, training cost: 2.28343462944
Epoch 4000 / 10000, training cost: 2.12130451202
Epoch 5000 / 10000, training cost: 1.9828094244
Epoch 6000 / 10000, training cost: 1.8586704731
Epoch 7000 / 10000, training cost: 1.74536836147
Epoch 8000 / 10000, training cost: 1.6385037899
Epoch 9000 / 10000, training cost: 1.53528344631
Epoch 10000 / 10000, training cost: 1.43692815304


In [8]:
# generating text
model.eval()
context_idxs = [word2ind['D'], word2ind['e']]
logue = context_idxs
for i in xrange(data_num):
    context_var = Variable(one_hot([context_idxs]).view([-1, x_length, num_classes])).cuda()
    context_idxs = model(context_var).data.topk(1)[1].cpu().numpy()[0, 0]
    logue.append(context_idxs)
    context_idxs = logue[-2:]

pred_sentence = ''.join([ind2word[i] for i in logue])

In [9]:
import editdistance

print('Distance between these two sentences is %s' % (editdistance.eval(test_sentence, pred_sentence)))
print("\033[1;31;40m %s \033[0m" % (test_sentence))
print(pred_sentence)

Distance between these two sentences is 693
[1;31;40m 
Deep learning (also known as deep structured learning or hierarchical learning)
is part of a broader family of machine learning methods based on learning data
representations, as opposed to task-specific algorithms. Learning can be supervised,
semi-supervised or unsupervised. Deep learning models are loosely related to information
processing and communication patterns in a biological nervous system, such as neural
coding that attempts to define a relationship between various stimuli and associated
neuronal responses in the brain. Deep learning architectures such as deep neural
networks, deep belief networks and recurrent neural networks have been applied to
fields including computer vision, speech recognition, natural language processing,
audio recognition, social network filtering, machine translation, bioinformatics
and drug design,[5] where they have produced results comparable to and in some
cases superior[6] to human experts.