In [2]:
from keras.layers import *
from keras.models import Model
from keras.activations import relu

import numpy as np
import tensorflow as tf
import keras.backend as K

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
# 解决keras 显存问题
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.25
set_session(tf.Session(config=config)) 

In [4]:
input_length = 10
input_dim = 1
MAX_LENGTH = 300 # 单个字符的长度
output_length = 10
output_dim = 4

samples = 100
hidden_dim = 3
hidden_size = 150 # must be a half of Max_LENGTH

In [5]:
x = np.random.random((samples, MAX_LENGTH)) ## 100,300
y = np.random.random((samples, MAX_LENGTH))
z = np.random.random((samples, MAX_LENGTH))

![avatar](./encoder-network.png)

In [6]:
#embedding_size = [embedding_length,embedding_dim,input_length]
embedding_size = [1000,64,300]
# hidden_size is the length of the kernel utils  = enc_units
class EncoderRNN(Model):
    def __init__(self, embedding_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = Embedding(embedding_size[0], embedding_size[1],input_length=embedding_size[2])
        self.gru = GRU(hidden_size, return_sequences=True, return_state=True)
        self.deepgru = GRU(hidden_size*2, return_sequences=True, return_state=True)
        self.lastdeepgru = GRU(hidden_size*2, return_sequences=False, return_state=True)
        self.bigru = Bidirectional(self.gru)

    def __call__(self, en_input):
        state_h = []
        emb = self.embedding(en_inputs)
        encoder_out, fwd_h1, bck_h1 = self.bigru(emb)
        state_h.append(concatenate([fwd_h1, bck_h1]))
        if hidden_dim>1:
            for i in range(1,hidden_dim):
                encoder_out, en_hidden = self.deepgru(encoder_out)
                state_h.append(en_hidden)
        output = encoder_out
        hidden = state_h
        return output, hidden
    
    def initialize_hidden_state(self):
        init_state = [tf.zeros((1, self.hidden_size)) for i in range(2)]
        return init_state

![avatar](./decoder-network.png)

In [7]:
class DecoderRNN(Model):
    def __init__(self, embedding_size, hidden_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = Embedding(embedding_size[0], embedding_size[1],input_length=embedding_size[2])
        self.gru = GRU(hidden_size, return_sequences=True, return_state=True)
        self.deepgru = GRU(hidden_size*2, return_sequences=True, return_state=True)
        self.lastdeepgru = GRU(hidden_size*2, return_sequences=False, return_state=True)
        self.bigru = Bidirectional(self.gru)
        
        self.out = Dense(output_length)
        self.softmax = Activation('softmax')

    def __call__(self, de_input, hiddens):
        state_h = []
        emb = self.embedding(en_inputs)
        emb = Activation('relu')(emb)
        # !挖个坑，这里没有初始化hidden_state
        output, fwd_h, bck_h = self.bigru(emb)
        state_h.append(concatenate([fwd_h, bck_h]))
        if len(hiddens)>2:
            for hidden in hiddens[1:-1]:
                output, de_hidden = self.deepgru(output, hidden)
                state_h.append(de_hidden)
        if len(hiddens)>1:
            output, de_hidden = self.lastdeepgru(output, hidden)
            state_h.append(de_hidden)
        hidden = state_h
        output = self.softmax(output)
        print(type(output))
        return output, hidden

    def initialize_hidden_state(self):
        init_state = tf.zeros((1, self.hidden_size))
        return init_state

![avatar](./attention-decoder-network.png)

In [15]:

class AttnDecoderRNN(Model):
    def __init__(self, embedding_size ,hidden_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = Embedding(embedding_size[0], embedding_size[1],input_length=embedding_size[2],name ='atten_embed')
        self.attn = Dense(self.max_length,name = 'atten_attn')
        self.attn_combine = Dense(self.hidden_size,name = 'atten_combine')
        self.dropout = Dropout(self.dropout_p,name ='atten_dropout')
        
        self.gru = GRU(hidden_size, return_sequences=True, return_state=True,name = 'atten_gru')
        self.deepgru = GRU(hidden_size*2, return_sequences=True, return_state=True,name = 'atten_deepgru')
        self.lastdeepgru = GRU(hidden_size*2, return_sequences=False, return_state=True,name='atten_lastdeepgru')
        self.bigru = Bidirectional(self.gru,name='atten_bigru')
        self.batch_dot = Lambda(lambda layers:K.batch_dot(layers[0],layers[1]))
        self.out = Dense(output_length)
        self.softmax =Softmax(axis=-1)
        #包装层
        
        
        

    def __call__(self, de_input, hiddens, encoder_outputs):
        state_h = []
        embedded = self.embedding(de_input)
        print(embedded.shape)
        # TODO ; use lambda https://www.cnblogs.com/jqpy1994/p/11433746.html or  https://keras.io/zh/layers/core/
        #embedded = K.reshape(embedded,[samples,embedding_size[1]*embedding_size[2]])
        embedded = Reshape((1,embedding_size[1]*embedding_size[2]))(embedded)
        print(type(embedded))
        embedded = Lambda(lambda x:K.squeeze(x,1))(embedded)
    
        embedded = self.dropout(embedded) # Dim:(Batch Size , Decoder Hidden Size + Embedding Size)
        if type(hiddens) == type(list()):
            hidden = hiddens[-1]
        
        # hidden = K.reshape(hidden,[samples,self.hidden_size*2]) 
        hidden = Reshape((1,self.hidden_size*2))(hidden)
        hidden = Lambda(lambda x:K.squeeze(x,1))(hidden)
        concat = Concatenate(1,name='atten_concat2')([embedded, hidden])
        # note: 从这里开始，把两个向量拼接起来    
        attn_weights =self.softmax(
            Dense(self.max_length)(concat))
        atten_weights = Reshape((1,-1))(attn_weights)
        attn_applied = self.batch_dot([atten_weights,encoder_outputs])
        print(attn_applied.shape)
        attn_applied = Lambda(lambda x:K.squeeze(x,1))(attn_applied)

        
        output = Concatenate(1)([embedded, attn_applied])

        output = self.attn_combine(output)
        
        output = ReLU()(output)
        output = Reshape((1,-1))(output)
        output, fwd_h, bck_h = self.bigru(output)
        state_h.append(concatenate([fwd_h, bck_h]))
        if len(hiddens)>2:
            for hidden in hiddens[1:-1]:
                output, de_hidden = self.deepgru(output, hidden)
                state_h.append(de_hidden)
        if len(hiddens)>1:
            output, de_hidden = self.lastdeepgru(output, hidden)
            state_h.append(de_hidden)
        hidden = state_h
#         output = self.softmax(output)
        output = Reshape((1,-1))(output)
        output = TimeDistributed(Dense(MAX_LENGTH, activation='softmax'))(output)
        output = Lambda(lambda x:K.squeeze(x,1))(output)
        print(output.shape)
        return output, hidden
    
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [16]:
encoder = EncoderRNN(embedding_size,hidden_size)
decoder = DecoderRNN(embedding_size,hidden_size)
atten = AttnDecoderRNN(embedding_size,hidden_size)# hidden_size = 6

en_inputs = Input(shape=(300,),dtype=float)
de_inputs = Input(shape=(300,),dtype=float)
en_output, en_hidden = encoder(en_inputs)
attent_output,attn_hidden = atten(de_inputs,en_hidden,en_output)

(?, 300, 64)
<class 'tensorflow.python.framework.ops.Tensor'>
(?, 1, 300)
(?, 300)


In [21]:
model = Model([en_inputs,de_inputs], attent_output)
model.compile(loss='categorical_crossentropy', optimizer='sgd',metrics=['accuracy'])
model.summary()

# m = model.predict(z)
# print(m.shape)
print(x.shape)
print(y.shape)
print(z.shape)
model.fit([x,y],z,
          epochs=20,
          batch_size=100)
# model.fit(x, y, nb_epoch=1)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            (None, 300)          0                                            
__________________________________________________________________________________________________
embedding_9 (Embedding)         (None, 300, 64)      64000       input_8[0][0]                    
__________________________________________________________________________________________________
input_9 (InputLayer)            (None, 300)          0                                            
__________________________________________________________________________________________________
bidirectional_9 (Bidirectional) [(None, 300, 300), ( 193500      embedding_9[0][0]                
__________________________________________________________________________________________________
atten_embe

Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f2fab0fc048>