In [5]:

from keras.layers import *
from keras.models import Model
from keras.activations import relu

import numpy as np
import tensorflow as tf
import keras.backend as K

In [6]:
# 解决keras 显存问题
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.25
set_session(tf.Session(config=config)) 

In [7]:
input_length = 10
input_dim = 1
MAX_LENGTH = 300 # 单个字符的长度
output_length = 10
output_dim = 4

samples = 100
hidden_dim = 3
hidden_size = 6

In [8]:
x = np.random.random((samples, MAX_LENGTH)) ## 100,300
y = np.random.random((samples, MAX_LENGTH))
z = np.random.random((samples, MAX_LENGTH))

![avatar](./encoder-network.png)

In [9]:
#embedding_size = [embedding_length,embedding_dim,input_length]
embedding_size = [1000,64,300]
# hidden_size is the length of the kernel utils  = enc_units
class EncoderRNN(Model):
    def __init__(self, embedding_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = Embedding(embedding_size[0], embedding_size[1],input_length=embedding_size[2])
        self.gru = GRU(hidden_size, return_sequences=True, return_state=True)
        self.deepgru = GRU(hidden_size*2, return_sequences=True, return_state=True)
        self.lastdeepgru = GRU(hidden_size*2, return_sequences=False, return_state=True)
        self.bigru = Bidirectional(self.gru)

    def __call__(self, en_input):
        state_h = []
        emb = self.embedding(en_inputs)
        encoder_out, fwd_h1, bck_h1 = self.bigru(emb)
        state_h.append(concatenate([fwd_h1, bck_h1]))
        if hidden_dim>1:
            for i in range(1,hidden_dim):
                encoder_out, en_hidden = self.deepgru(encoder_out)
                state_h.append(en_hidden)
        output = encoder_out
        hidden = state_h
        return output, hidden
    
    def initialize_hidden_state(self):
        init_state = [tf.zeros((1, self.hidden_size)) for i in range(2)]
        return init_state

![avatar](./decoder-network.png)

In [22]:
class DecoderRNN(Model):
    def __init__(self, embedding_size, hidden_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = Embedding(embedding_size[0], embedding_size[1],input_length=embedding_size[2])
        self.gru = GRU(hidden_size, return_sequences=True, return_state=True)
        self.deepgru = GRU(hidden_size*2, return_sequences=True, return_state=True)
        self.lastdeepgru = GRU(hidden_size*2, return_sequences=False, return_state=True)
        self.bigru = Bidirectional(self.gru)
        
        self.out = Dense(output_length)
        self.softmax = Activation('softmax')

    def __call__(self, de_input, hiddens):
        state_h = []
        emb = self.embedding(en_inputs)
        emb = Activation('relu')(emb)
        # !挖个坑，这里没有初始化hidden_state
        output, fwd_h, bck_h = self.bigru(emb)
        state_h.append(concatenate([fwd_h, bck_h]))
        if len(hiddens)>2:
            for hidden in hiddens[1:-1]:
                output, de_hidden = self.deepgru(output, hidden)
                state_h.append(de_hidden)
        if len(hiddens)>1:
            output, de_hidden = self.lastdeepgru(output, hidden)
            state_h.append(de_hidden)
        hidden = state_h
        output = self.softmax(output)
        print(type(output))
        return output, hidden

    def initialize_hidden_state(self):
        init_state = tf.zeros((1, self.hidden_size))
        return init_state

![avatar](./attention-decoder-network.png)

In [44]:

class AttnDecoderRNN(Model):
    def __init__(self, embedding_size ,hidden_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = Embedding(embedding_size[0], embedding_size[1],input_length=embedding_size[2],name ='atten_embed')
        self.attn = Dense(self.max_length,name = 'atten_attn')
        self.attn_combine = Dense(self.hidden_size,name = 'atten_combine')
        self.dropout = Dropout(self.dropout_p,name ='atten_dropout')
        
        self.gru = GRU(hidden_size, return_sequences=True, return_state=True,name = 'atten_gru')
        self.deepgru = GRU(hidden_size*2, return_sequences=True, return_state=True,name = 'atten_deepgru')
        self.lastdeepgru = GRU(hidden_size*2, return_sequences=False, return_state=True,name='atten_lastdeepgru')
        self.bigru = Bidirectional(self.gru,name='atten_bigru')
        self.batch_dot = Lambda(lambda x,y:K.batch_dot(x,y))
        self.out = Dense(output_length)
        self.softmax =Softmax(axis=-1)
        #包装层
        
        
        

    def __call__(self, de_input, hiddens, encoder_outputs):
        state_h = []
        embedded = self.embedding(de_input)
        print(embedded.shape)
        # TODO ; use lambda https://www.cnblogs.com/jqpy1994/p/11433746.html or  https://keras.io/zh/layers/core/
        #embedded = K.reshape(embedded,[samples,embedding_size[1]*embedding_size[2]])
        embedded = Reshape((1,embedding_size[1]*embedding_size[2]))(embedded)
        print(type(embedded))
        embedded = Lambda(lambda x:K.squeeze(x,1))(embedded)
    
        embedded = self.dropout(embedded) # Dim:(Batch Size , Decoder Hidden Size + Embedding Size)
        if type(hiddens) == type(list()):
            hidden = hiddens[-1]
        
        # hidden = K.reshape(hidden,[samples,self.hidden_size*2]) 
        hidden = Reshape((1,self.hidden_size*2))(hidden)
        hidden = Lambda(lambda x:K.squeeze(x,1))(hidden)
        concat = Concatenate(1,name='atten_concat2')([embedded, hidden])
        # note: 从这里开始，把两个向量拼接起来    
        attn_weights =self.softmax(
            Dense(self.max_length)(concat))
        
        attn_applied = K.batch_dot(K.expand_dims(attn_weights,1),
                                 encoder_outputs)
        print(attn_applied.shape)
        attn_applied =K.squeeze(attn_applied,1)

        
        output = Concatenate(1)([embedded, attn_applied])

        output = self.attn_combine(output)
        
        output = ReLU()(output)
        output = K.expand_dims(output,1)
        print(type(output))
        output, fwd_h, bck_h = self.bigru(output)
        state_h.append(concatenate([fwd_h, bck_h]))
        if len(hiddens)>2:
            for hidden in hiddens[1:-1]:
                output, de_hidden = self.deepgru(output, hidden)
                state_h.append(de_hidden)
        if len(hiddens)>1:
            output, de_hidden = self.lastdeepgru(output, hidden)
            state_h.append(de_hidden)
        hidden = state_h
        output = self.softmax(output)
        return output, hidden, attn_weights
    
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

SyntaxError: invalid syntax (<ipython-input-44-ea0bce1d4b29>, line 18)

In [43]:
encoder = EncoderRNN(embedding_size,hidden_size)
decoder = DecoderRNN(embedding_size,hidden_size)
atten = AttnDecoderRNN(embedding_size,hidden_size)# hidden_size = 6

en_inputs = Input(shape=(300,),dtype=float)
de_inputs = Input(shape=(300,),dtype=float)
en_output, en_hidden = encoder(en_inputs)
de_output, de_hidden = decoder(de_inputs,en_hidden)
attent_output = atten(de_inputs,de_hidden,en_output)

<class 'tensorflow.python.framework.ops.Tensor'>
(?, 300, 64)
<class 'tensorflow.python.framework.ops.Tensor'>


TypeError: <lambda>() missing 1 required positional argument: 'y'

In [27]:
model = Model([en_inputs,de_inputs], attent_output)

model.compile(loss='categorical_crossentropy', optimizer='sgd')
# m = model.predict(z)
# print(m.shape)
model.fit([x,y],z,
          epochs=20,
          batch_size=100)
# model.fit(x, y, nb_epoch=1)

ValueError: Output tensors to a Model must be the output of a Keras `Layer` (thus holding past layer metadata). Found: Tensor("Squeeze_14:0", shape=(?, 12), dtype=float32)