In [21]:
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Input, Embedding, Dense, TimeDistributed, Concatenate, Attention
import tensorflow_addons as tfa
from tensorflow_addons.seq2seq import AttentionWrapper
import keras



## Load Preprocessed Data

In [None]:
data = np.load('../preprocessing/preprocessed.npz')
x_train = data['x_train']
x_test = data['x_test']
y_train = data['y_train']
y_test = data['y_test']
max_text_len = data['max_text_len']
max_summary_len = data['max_summary_len']
x_voc_size = data['x_voc_size']
y_voc_size = data['y_voc_size']

array([378, 189,   8, ...,   0,   0,   0], dtype=int32)

## Create an attention mechanism
https://www.analyticsvidhya.com/blog/2019/11/comprehensive-guide-attention-mechanism-deep-learning/

In [7]:
import keras.backend as K

In [19]:
class attention(keras.layers.Layer):
    def build(self, input_shape):
        self.W = self.add_weight(name = "att_weight", shape = (input_shape[-1],1))
        self.b=self.add_weight(name="att_bias",shape=(input_shape[1],1),initializer="zeros")        
        super(attention, self).build(input_shape)
    def call(self, x):
        """
        inputs: x = [encoder_output_sequence, decoder_output_sequence]
        """
        et = K.squeeze(K.tanh(K.dot(x,self.W)+ self.b), axis = -1)
        at = K.expand_dims(K.softmax(et), axis = -1)
        output = x*at
        return K.sum(output,axis =1)
    def compute_output_shape(self,input_shape):
        return (input_shape[0], input_shape[-1])
    def get_config(self):
        return super(attention,self).get_config()
        

In this notebook we build up the Encoder-Decoder architecture using LSTM model. 

In [24]:
hidden_size = 1000

#https://www.analyticsvidhya.com/blog/2019/06/comprehensive-guide-text-summarization-using-deep-learning-python/
def create_model(max_text_len,x_voc_size, hidden_size):
    #Encoder
    encoder_input = Input(shape=(max_text_len,))
    enc_emb = Embedding(x_voc_size, hidden_size,trainable=True)(encoder_input) 
    
    enc_lstm1 = LSTM(hidden_size, return_sequences = True, return_state = True)
    encoder_out1, state_h1, state_c1 = enc_lstm1(enc_emb)
    
    enc_lstm2 = LSTM(hidden_size, return_sequences = True, return_state = True)
    encoder_out2, state_h2, state_c2 = enc_lstm2(enc_emb)
    
    enc_lstm3 = LSTM(hidden_size, return_sequences = True, return_state = True)
    encoder_out3, state_h3, state_c3 = enc_lstm3(enc_emb)
    
    decoder_input = Input(shape = (None,))
    dec_emb_layer = Embedding(y_voc_size, hidden_size, trainable = True)
    dec_emb = dec_emb_layer(decoder_input) 
    
    dec_lstm = LSTM(hidden_size, return_sequences=True, return_state=True) 
    dec_outputs,decoder_fwd_state, decoder_back_state = dec_lstm(dec_emb,initial_state=[state_h3, state_c3]) 
    

    print(f"dec_outputs: {np.shape(dec_outputs)} encoder_out3: {np.shape(encoder_out3)}")
    attention_out = Attention()([encoder_out3,dec_outputs])
    decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([dec_outputs, attention_out])
    decoder_dense = TimeDistributed(Dense(y_voc_size, activation='softmax'))  
    decoder_outputs = decoder_dense(decoder_concat_input)   
    model =Model([encoder_input, decoder_input],decoder_outputs)
    model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy')
    return model
#attention = AttentionWrapper()

In [25]:
model = create_model(max_text_len,x_voc_size, hidden_size)
model.summary()

dec_outputs: (None, None, 1000) encoder_out3: (None, 5000, 1000)
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_13 (InputLayer)          [(None, 5000)]       0           []                               
                                                                                                  
 input_14 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 embedding_12 (Embedding)       (None, 5000, 1000)   15472000    ['input_13[0][0]']               
                                                                                                  
 embedding_13 (Embedding)       (None, None, 1000)   15472000    ['input_14[0][0]']               
                           