In [1]:
import keras
from keras.preprocessing.text import one_hot,Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense , Flatten ,Embedding,Input
from keras.models import Model

from keras import backend as K
from keras.layers import LSTM, Bidirectional, concatenate, Add, Lambda

import numpy as np

import re

Using TensorFlow backend.


In [2]:
# Read the file
EMBEDDING_FILE=r'C:\Users\TanZhenR\Desktop\Projects\Standard\Text Embeddings\Glove/glove.6B.100d.txt'
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.strip().split()) for o in open(EMBEDDING_FILE, encoding="utf8"))
all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()

In [3]:
EMBED_SIZE = 100
LATENT_DIM = 50
VOC_SIZE = len(embeddings_index)

# Embedding
word_index = embeddings_index.keys()
embedding_matrix = np.random.normal(emb_mean, emb_std, (VOC_SIZE, EMBED_SIZE))
for i, word in enumerate(word_index):
    if len(re.findall('[,.?!@#$%^&*()-+_={}|:;]', word)) == 0:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

# Design Model

In [4]:
K.clear_session()

# Encoder embedding
enc_input = Input(shape=(None,), name='EncoderInputLayer') 
enc_emb_layer = Embedding(VOC_SIZE, EMBED_SIZE, weights=[embedding_matrix], trainable=False, name='EncoderEmbedLayer')
enc_emb_output = enc_emb_layer(enc_input)

#BiLSTM encoder
enc_lstm_layer = Bidirectional(LSTM(LATENT_DIM, return_sequences = True, return_state = True, dropout = 0.3), name='EncBiLSTMLayer')
enc_output, fw_h, fw_c, bw_h, bw_c = enc_lstm_layer(enc_emb_output)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [5]:
# concat states for attn_dist
enc_state_h = concatenate([fw_h, bw_h], name='EncConcatH')
enc_state_c = concatenate([fw_c, bw_c], name='EncConcatC')

enc_state_h = Dense(LATENT_DIM*2, name='EncDenseH')(enc_state_h)
enc_state_c = Dense(LATENT_DIM*2, name='EncDenseC')(enc_state_c)

# dec initial state
dec_ini_state = [enc_state_h, enc_state_c]

In [6]:
# Decoder
dec_input = Input(shape=(None,), name='DecoderInput') 
dec_emb_layer = Embedding(VOC_SIZE, EMBED_SIZE, weights=[embedding_matrix], trainable=False, name='DecoderEmbedLayer')
dec_emb_output = dec_emb_layer(dec_input)

# decoder LSTM
dec_lstm_layer = LSTM(LATENT_DIM*2, return_sequences=True, return_state=True, dropout=0.3, name='DecLSTMLayer')
dec_output, state_h, state_c = dec_lstm_layer(dec_emb_output, initial_state=dec_ini_state)
dec_state = concatenate([state_h, state_c], name='DecConcatHC')

# decoder feature
dec_feat = Lambda(lambda x: K.expand_dims(x, axis=1), name='DecFeat1')(dec_state)
dec_feat = Lambda(lambda x: K.expand_dims(x, axis=1), name='DecFeat2')(dec_feat)

# decoder Distribution
dec_dist = Dense(VOC_SIZE, activation='softmax', name='DecDistribution')(dec_output)

In [7]:
# Attention Mechanism

# reshape enc_output
enc_feat = Lambda(lambda x: K.expand_dims(x, axis=2), name='EncFeat1')(enc_output)
enc_feat = Dense(EMBED_SIZE*2, name='EncFeat2')(enc_feat)

# attention distribution
attn_dist = Lambda(lambda x: x[0] + x[1], name='AttnDist1')([enc_feat, dec_feat])
attn_dist = Dense(EMBED_SIZE*2, name='AttnDist2')(attn_dist)
attn_dist = Lambda(lambda x: K.sum(x, axis=[2,3]), name='AttnDist3')(attn_dist)

# encoding distribution
enc_dist = Lambda(lambda x: x[0] * x[1], name='EncDist1')([attn_dist, enc_emb_output])
enc_dist = Dense(VOC_SIZE, activation='softmax', name='EncDist2')(enc_dist)

In [8]:
# pgen
pgen = concatenate([concatenate(dec_ini_state), dec_state], name='PGen1')
pgen = Dense(1, activation='linear', name='PGen2')(pgen)

# overall distribution
overall_dist = Lambda(lambda x: (1-x[2])*x[0] + x[2]*x[1], name='Output')([enc_dist, dec_dist, pgen])

In [9]:
model = Model(inputs=[enc_input, dec_input], outputs=overall_dist)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
EncoderInputLayer (InputLayer)  (None, None)         0                                            
__________________________________________________________________________________________________
EncoderEmbedLayer (Embedding)   (None, None, 100)    40000000    EncoderInputLayer[0][0]          
__________________________________________________________________________________________________
EncBiLSTMLayer (Bidirectional)  [(None, None, 100),  60400       EncoderEmbedLayer[0][0]          
__________________________________________________________________________________________________
DecoderInput (InputLayer)       (None, None)         0                                            
__________________________________________________________________________________________________
EncConcatH