In [1]:
%matplotlib inline

递归神经网络 - Recurrent Neural Network
====
>Python2.7 + Pytorch 1.2.0 backened
>
>text

In [2]:
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange

In [3]:
test_sentence = """
Deep learning (also known as deep structured learning or hierarchical learning)
is part of a broader family of machine learning methods based on learning data
representations, as opposed to task-specific algorithms. Learning can be supervised,
semi-supervised or unsupervised. Deep learning models are loosely related to information
processing and communication patterns in a biological nervous system, such as neural
coding that attempts to define a relationship between various stimuli and associated
neuronal responses in the brain. Deep learning architectures such as deep neural
networks, deep belief networks and recurrent neural networks have been applied to
fields including computer vision, speech recognition, natural language processing,
audio recognition, social network filtering, machine translation, bioinformatics
and drug design,[5] where they have produced results comparable to and in some
cases superior[6] to human experts.
"""
# from wikipedia https://en.wikipedia.org/wiki/Deep_learning

vocab = set(test_sentence)
word2ind = {word: i for i, word in enumerate(vocab)}
ind2word = {i: word for i, word in enumerate(vocab)}

x_length = 2
num_classes = len(vocab)

In [4]:
data_num = len(test_sentence) - x_length
x = [[word2ind[ch] for ch in test_sentence[i:i + x_length]]
          for i in xrange(len(test_sentence) - x_length)]
y = [[word2ind[test_sentence[i]]] for i in xrange(x_length, len(test_sentence))]

import keras
import numpy as np

x = keras.utils.to_categorical(x, num_classes)
x = x.reshape((data_num, x_length, num_classes))
y = keras.utils.to_categorical(y, num_classes)

Using TensorFlow backend.


In [5]:
# parameter
input_size = len(vocab)
hidden_size = 60
num_layers = 1
image_size = (x_length, num_classes,)
training_epoch = 10000

In [6]:
from keras.models import Model
from keras.layers import Input, Dense, Flatten, SimpleRNN, GRU, LSTM, concatenate, Dropout

# basic RNN/GRU
input_tensor = Input(shape=image_size)
inner = input_tensor
for _ in xrange(num_layers):
    # # RNN
    # inner = SimpleRNN(units=hidden_size, input_shape=image_size, return_sequences=True)(inner)

    # # GRU
    # inner = GRU(units=hidden_size, input_shape=image_size, return_sequences=True)(inner)

    # # LSTM
    # inner = LSTM(units=hidden_size, input_shape=image_size, return_sequences=True)(inner)

    # BiLSTM
    inner_f = LSTM(units=hidden_size, input_shape=image_size, return_sequences=True)(inner)
    inner_b = LSTM(units=hidden_size, input_shape=image_size, return_sequences=True, go_backwards=True)(inner)
    inner = concatenate([inner_f, inner_b], axis=-1)
inner = Flatten()(inner)
inner = Dense(units=num_classes, activation='softmax')(inner)
model = Model(inputs=input_tensor, outputs=inner)

model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 2, 36)        0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   (None, 2, 60)        23280       input_1[0][0]                    
__________________________________________________________________________________________________
lstm_2 (LSTM)                   (None, 2, 60)        23280       input_1[0][0]                    
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 2, 120)       0           lstm_1[0][0]                     
                                                                 lstm_2[0][0]                     
__________

In [7]:
from keras.optimizers import *
from keras.losses import categorical_crossentropy

model.compile(loss='categorical_crossentropy',
              optimizer=Adagrad(),
              metrics=['accuracy'])

history = model.fit(x, y,
                    batch_size=len(x),
                    epochs=training_epoch,
                    verbose=0)


for epoch in xrange(1, 1 + training_epoch):
    if epoch % 1000 == 0:
        print('Epoch %s / %s, training cost: %s, accuracy: %s' % \
              (epoch, training_epoch, history.history['loss'][epoch-1], history.history['acc'][epoch-1]))

Epoch 1000 / 10000, training cost: 0.99016046524, accuracy: 0.600635647774
Epoch 2000 / 10000, training cost: 0.940730214119, accuracy: 0.600635588169
Epoch 3000 / 10000, training cost: 0.933176517487, accuracy: 0.600635647774
Epoch 4000 / 10000, training cost: 0.930402636528, accuracy: 0.600635647774
Epoch 5000 / 10000, training cost: 0.929005503654, accuracy: 0.600635588169
Epoch 6000 / 10000, training cost: 0.928175449371, accuracy: 0.600635588169
Epoch 7000 / 10000, training cost: 0.927631735802, accuracy: 0.600635647774
Epoch 8000 / 10000, training cost: 0.92724698782, accuracy: 0.600635588169
Epoch 9000 / 10000, training cost: 0.926964700222, accuracy: 0.600635588169
Epoch 10000 / 10000, training cost: 0.926748156548, accuracy: 0.600635588169


In [8]:
# generating text
context_idxs = [word2ind['D'], word2ind['e']]
logue = context_idxs
for i in xrange(data_num):
    context_var = keras.utils.to_categorical(context_idxs, num_classes)
    context_var = context_var.reshape(1, x_length, num_classes)
    context_idxs = np.argmax(model.predict(context_var))
    logue.append(context_idxs)
    context_idxs = logue[-2:]

pred_sentence = ''.join([ind2word[i] for i in logue])

import editdistance

print('Distance between these two sentences is %s' % (editdistance.eval(test_sentence, pred_sentence)))
print("\033[1;31;40m %s \033[0m" % (test_sentence))
print(pred_sentence)

Distance between these two sentences is 711
[1;31;40m 
Deep learning (also known as deep structured learning or hierarchical learning)
is part of a broader family of machine learning methods based on learning data
representations, as opposed to task-specific algorithms. Learning can be supervised,
semi-supervised or unsupervised. Deep learning models are loosely related to information
processing and communication patterns in a biological nervous system, such as neural
coding that attempts to define a relationship between various stimuli and associated
neuronal responses in the brain. Deep learning architectures such as deep neural
networks, deep belief networks and recurrent neural networks have been applied to
fields including computer vision, speech recognition, natural language processing,
audio recognition, social network filtering, machine translation, bioinformatics
and drug design,[5] where they have produced results comparable to and in some
cases superior[6] to human experts.