In [1]:
from keras.layers import Bidirectional, LSTM, Input, Embedding, Lambda, Flatten, Dense, Softmax, TimeDistributed
import keras.backend as K
from keras.activations import sigmoid, relu
from layers import *
import keras
from keras.models import Model
from params import Params

Using TensorFlow backend.


In [2]:
passages_embedding = Input(shape=(Params.max_passage_count, Params.max_passage_len, Params.embedding_dim), 
                                dtype="float32", name="passages_embedding")
question_embedding = Input(shape=(Params.max_question_len, Params.embedding_dim),
                        dtype="float32", name="question_embedding")

encode_layer = Bidirectional(LSTM(Params.embedding_dim, #recurrent_keep_prob=1-Params.encoder_dropout, 
                            return_sequences=True), name="input_encoder")

In [3]:
question_encoding = encode_layer(question_embedding);
print(question_encoding)

Instructions for updating:
Colocations handled automatically by placer.
Tensor("input_encoder/concat:0", shape=(?, ?, 128), dtype=float32)


In [4]:
passage_encoding = TimeDistributed(encode_layer, name="passage_encoding")(passages_embedding)

In [48]:
passage_encoding

<tf.Tensor 'passage_encoding_3/Reshape_1:0' shape=(?, 5, 200, 128) dtype=float32>

In [5]:
def concat(inputs):
    return K.concatenate(inputs)
Concat = Lambda(concat)

In [16]:
class ContextEncoding(Layer):
    def __init__(self, question_encoding, **kwargs):
        self.question_encoding = question_encoding
        super(ContextEncoding, self).__init__(**kwargs)

    def build(self, input_shape):
        self.c2qAttention = C2QAttention(name="context_to_query_attention")
        self.c2qAttention.build(input_shape)
        self.q2cAttention =  Q2CAttention(name='query_to_context_attention')
        self.q2cAttention.build(input_shape)
        self.mergedContext = MergedContext(name='merged_context')
        self.mergedContext.build(input_shape)

        self.trainable_weights = self.c2qAttention.trainable_weights + self.q2cAttention.trainable_weights + self.mergedContext.trainable_weights
        super(ContextEncoding, self).build(input_shape)


    def call(self, passage_encoding):
        question_encoding = self.question_encoding
        print(passage_encoding)
        # question_encoding, passage_encoding = input
        score_matrix = K.squeeze(K.dot(passage_encoding, K.permute_dimensions(self.question_encoding, (0, 2, 1))), axis=-1)
        print(score_matrix)
        context_to_query_attention = self.c2qAttention([
                                                    score_matrix, question_encoding])
        print(context_to_query_attention)
        query_to_context_attention = self.q2cAttention([score_matrix, passage_encoding])

        merged_context = self.mergedContext(
                                [passage_encoding, context_to_query_attention, query_to_context_attention])
        
        print(passage_encoding)
        print(context_to_query_attention)
        print(query_to_context_attention)
        
        # modeled_passage = Bidirectional(LSTM(50, recurrent_dropout=0.2, return_sequences=True), name="passage_context_encoding")(modeled_passage)
        return merged_context
    
    def compute_output_shape(self, input_shape):
        print(input_shape)
        return (None, input_shape[1], 512)

    def get_config(self):
        config = super().get_config()
        return config

In [17]:
passage_context = TimeDistributed(ContextEncoding(question_encoding))(passage_encoding)

Tensor("time_distributed_4/Reshape:0", shape=(?, 200, 128), dtype=float32)
Tensor("time_distributed_4/Squeeze:0", shape=(?, 200, ?), dtype=float32)
Tensor("time_distributed_4/context_to_query_attention/Sum:0", shape=(?, 200, 128), dtype=float32)
Tensor("time_distributed_4/Reshape:0", shape=(?, 200, 128), dtype=float32)
Tensor("time_distributed_4/context_to_query_attention/Sum:0", shape=(?, 200, 128), dtype=float32)
Tensor("time_distributed_4/query_to_context_attention/Tile:0", shape=(?, ?, 128), dtype=float32)
(None, 200, 128)
(None, 200, 128)


NameError: name 'score_matrix' is not defined

In [51]:
passage_context

<tf.Tensor 'time_distributed_7/Reshape_4:0' shape=(?, 5, 200, 512) dtype=float32>

In [52]:
model_passage_layer = Bidirectional(LSTM(Params.embedding_dim, recurrent_dropout=0.2, return_sequences=True), name="passage_modeling2")

In [53]:
passage_modeling = TimeDistributed(model_passage_layer, name="passage_modeling")(passage_context)

In [54]:
passage_modeling

<tf.Tensor 'passage_modeling_1/Reshape_1:0' shape=(?, 5, 200, 128) dtype=float32>

In [37]:
K.concatenate([passage_context, passage_modeling])

<tf.Tensor 'concat:0' shape=(?, 5, 200, 640) dtype=float32>

In [97]:
span_begin_probabilities = TimeDistributed(SpanBegin(name='span_begin'))(Concat([passage_context, passage_modeling]))

Tensor("time_distributed_17/Reshape:0", shape=(?, 200, 640), dtype=float32)


In [None]:
span_begin_probabilities

In [56]:
span_end_representation = SpanEndRepresentation([passage_context, passage_modeling, span_begin_probabilities])

In [None]:
span_end_representation

In [57]:
span_end_representation = TimeDistributed(Bidirectional(LSTM(Params.embedding_dim, return_sequences=True)), name="span_end_lstm")(span_end_representation)

In [104]:
span_end_representation

<tf.Tensor 'span_end_lstm/Reshape_1:0' shape=(?, 5, 200, 128) dtype=float32>

In [None]:
K.concatenate([passage_context, span_end_representation])

In [103]:
span_end_probabilities = TimeDistributed(SpanEnd(name="span_end_probability"))(Concat([passage_context, span_end_representation]))

(None, 200)
(None, 200)


In [59]:
span_end_probabilities

NameError: name 'span_end_probabilities' is not defined

In [62]:
content_indices = TimeDistributed(ContentIndice(name="content_indice"))(passage_modeling);
print(content_indices)

Tensor("time_distributed_13/Reshape_1:0", shape=(?, 5, 200), dtype=float32)


In [63]:
answer_encoding = AnswerEncoding([passages_embedding, content_indices]);
print(answer_encoding)

Tensor("answer_encoding/Sum:0", shape=(?, 5, 200), dtype=float32)


In [None]:
K.permute_dimensions(answer_encoding, (0, 2, 1))

In [None]:
matrix = K.dot(answer_encoding, K.permute_dimensions(answer_encoding, (0, 2, 1)))

In [None]:
K.squeeze(matrix, axis=-2)

In [None]:
tf.matmul(answer_encoding, K.permute_dimensions(answer_encoding, (0, 2, 1)))

In [64]:
score_matrix = K.squeeze(K.dot(answer_encoding, K.permute_dimensions(answer_encoding, (0, 2, 1))), axis=-2);
print(score_matrix)

Tensor("Squeeze:0", shape=(?, 5, 5), dtype=float32)


In [65]:
eye1 = K.eye(5)
zero1 = K.zeros_like(eye1)
mask = K.cast(K.equal(eye1, zero1), dtype="float32")

In [None]:
mask

In [66]:
score_matrix = score_matrix * mask

In [None]:
score_matrix

In [67]:
score_matrix = Softmax(axis=-1)(score_matrix);
print(score_matrix)

Tensor("softmax_15/truediv:0", shape=(?, 5, 5), dtype=float32)


In [68]:
answer_encoding_hat = K.squeeze(K.dot(score_matrix, answer_encoding), axis=-2);
print(answer_encoding_hat)

Tensor("Squeeze_1:0", shape=(?, 5, 200), dtype=float32)


In [69]:
answer_encoding_final = K.concatenate([answer_encoding, answer_encoding_hat, answer_encoding*answer_encoding_hat]);
print(answer_encoding_final)

Tensor("concat_5:0", shape=(?, 5, 600), dtype=float32)


In [70]:
answer_probability = Dense(1)(answer_encoding_final);
answer_probability = Softmax(axis=-1)(answer_probability)
print(answer_probability)

Tensor("softmax_16/truediv:0", shape=(?, 5, 1), dtype=float32)


In [71]:
answer_probability = K.squeeze(answer_probability, axis=-1);
print(answer_probability)

Tensor("Squeeze_2:0", shape=(?, 5), dtype=float32)


In [74]:
class AnswerProbability(Layer):

    def __init__(self, **kwargs):
        super(AnswerProbability, self).__init__(**kwargs)

    def build(self, input_shape):
        # input_shape: (None, 5, 200)
        self.dense_1 = Dense(1, activation="relu")
        self.dense_1.build(input_shape[:-1] + (3*input_shape[-1],))
        self.trainable_weights = self.dense_1.trainable_weights
        
        super(AnswerProbability, self).build(input_shape)
    
    def call(self, answer_encoding):
        score_matrix = K.squeeze(K.dot(answer_encoding, K.permute_dimensions(answer_encoding, (0, 2, 1))), axis=-2)
        eye1 = K.eye(Params.max_passage_count); zero1 = K.zeros_like(eye1); mask = K.cast(K.equal(eye1, zero1), dtype="float32")
        score_matrix = score_matrix * mask
        score_matrix = Softmax(axis=-1)(score_matrix)
        answer_encoding_hat = K.squeeze(K.dot(score_matrix, answer_encoding), axis=-2)
        answer_encoding_final = K.concatenate([answer_encoding, answer_encoding_hat, answer_encoding*answer_encoding_hat])
        answer_probability = Dense(1)(answer_encoding_final);
        answer_probability = Softmax(axis=-1)(answer_probability)
        answer_probability = K.squeeze(answer_probability, axis=-1)
        return answer_probability

In [77]:
answer_probability = AnswerProbability()(answer_encoding);
print(answer_probability)

Tensor("answer_probability_2/Squeeze_2:0", shape=(?, 5), dtype=float32)


In [None]:
Model([passages_embedding, question_embedding], [answer_probability, span_begin_probabilities, span_end_probabilities, content_indices])

In [87]:
span_begin_probabilities = TimeDistributed(SpanBegin(name='span_begin'))(K.concatenate([passage_context, passage_modeling]))

Tensor("time_distributed_14/Reshape:0", shape=(?, 200, 640), dtype=float32)


In [99]:
model = Model([passages_embedding, question_embedding], [span_begin_probabilities])

In [101]:
model = Model([passages_embedding, question_embedding], [answer_probability])

In [100]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
passages_embedding (InputLayer) (None, 5, 200, 64)   0                                            
__________________________________________________________________________________________________
passage_encoding (TimeDistribut (None, 5, 200, 128)  66048       passages_embedding[0][0]         
__________________________________________________________________________________________________
time_distributed_7 (TimeDistrib (None, 5, 200, 512)  0           passage_encoding[0][0]           
__________________________________________________________________________________________________
passage_modeling (TimeDistribut (None, 5, 200, 128)  295424      time_distributed_7[0][0]         
__________________________________________________________________________________________________
lambda_1 (

In [102]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
passages_embedding (InputLayer) (None, 5, 200, 64)   0                                            
__________________________________________________________________________________________________
passage_encoding (TimeDistribut (None, 5, 200, 128)  66048       passages_embedding[0][0]         
__________________________________________________________________________________________________
time_distributed_7 (TimeDistrib (None, 5, 200, 512)  0           passage_encoding[0][0]           
__________________________________________________________________________________________________
passage_modeling (TimeDistribut (None, 5, 200, 128)  295424      time_distributed_7[0][0]         
__________________________________________________________________________________________________
time_distr

In [None]:
K.log(answer_probability)

In [None]:
score_matrix = tf.Variable([[1, 2], [2, 3]])
mask = tf.Variable([[0, 1], [1, 0]])

In [None]:
K.eval(score_matrix * mask)

In [None]:
K.dot(score_matrix, mask)

In [None]:
tf.cast(tf.logical_not(tf.cast(tf.matrix_diag([1] * 5), tf.bool)), tf.float32)

In [None]:
eye1 = K.eye(3)
zeor1 = K.zeros_like(eye)
mask = K.cast(K.equal(eye1, zero1), dtype="float32")

In [None]:
?K.zeros_like

In [None]:
?K.eye

In [None]:
K.eval(cast)

In [None]:
content_indices

In [None]:
passage_modeling

In [None]:
p_m = passage_modeling[:, 0, :, :]

In [None]:
p_m

In [10]:
p_1 = Dense(Params.embedding_dim, activation="relu")(p_m);
print(p_1)

NameError: name 'p_m' is not defined

In [None]:
p_2 = Dense(1, activation="relu")(p_1);
print(p_2)

In [None]:
Softmax(axis=-1)(K.squeeze(p_2, axis=-1))

In [None]:
passages_embedding

In [None]:
content_indices

In [11]:
K.expand_dims(content_indices, axis=-1) 

NameError: name 'content_indices' is not defined

In [None]:
K.expand_dims(content_indices, axis=-1) * passages_embedding

In [12]:
K.sum(K.expand_dims(content_indices, axis=-1) * passages_embedding, axis=-1)

NameError: name 'content_indices' is not defined

In [13]:
class AnswerProbability(Layer):
    
    def __init__(self, **kwargs):
        super(AnswerProbability, self).__init__(**kwargs)
        
    def build(self, input_shape):
        pass
    
    """
        input: (None, Params.max_passage_count, Pararms.max_passage_len)
        output: (None, Params.max_passage_count)
    """
    def call(self, answer_encoding):
        
        

SyntaxError: unexpected EOF while parsing (<ipython-input-13-701968f285c2>, line 15)

In [14]:
def answerEncoding(inputs):
    passage_embedding, indice_probability = inputs
    answer_encoding = K.expand_dims(indice_probability, axis=-1) * passage_embedding
    answer_encoding = K.sum(answer_encoding, axis=-1)
    return answer_encoding
AnswerEncoding = Lambda(answerEncoding, name="answer_encoding")

In [60]:
class SpanEnd(Layer):

    def __init__(self, **kwargs):
        super(SpanEnd, self).__init__(**kwargs)

    def build(self, input_shape):
        input_shape_dense_1 = input_shape[:-1] + (Params.embedding_dim*10, )
        self.dense_1 = Dense(units=1)
        self.dense_1.build(input_shape_dense_1)
        self.trainable_weights = self.dense_1.trainable_weights
        super(SpanEnd, self).build(input_shape)

    def call(self, span_end_input):
        span_end_weights = TimeDistributed(self.dense_1)(span_end_input)

        span_end_probabilities = Softmax()(K.squeeze(span_end_weights, axis=-1))
        return span_end_probabilities

    def compute_output_shape(self, input_shape):
        print(input_shape[:-1])
        return input_shape[:-1]

    def get_config(self):
        config = super().get_config()
        return config


In [16]:
class SpanBegin(Layer):

    def __init__(self, **kwargs):
        super(SpanBegin, self).__init__(**kwargs)

    def build(self, input_shape):
        # input_shape: (None, 200, embeddim*8+embeddim*2)
        self.dense_1 = Dense(units=1)
        self.dense_1.build(input_shape)
        self.trainable_weights = self.dense_1.trainable_weights
        super(SpanBegin, self).build(input_shape)

    def call(self, span_begin_input):
        print(span_begin_input)
        # span_begin_input = K.concatenate([merged_context, modeled_passage])
        span_begin_weights = TimeDistributed(self.dense_1)(span_begin_input)
        span_begin_probabilities = Softmax()(K.squeeze(span_begin_weights, axis=-1))
        return span_begin_probabilities

    def compute_output_shape(self, input_shape):
        return input_shape[:-1]

    def get_config(self):
        config = super().get_config()
        return config

In [17]:
def span_end_representation(inputs):
    passage_context, passage_modeling, span_probabilities = inputs
    
    weighted_sum = K.sum(K.expand_dims(span_begin_probabilities, axis=-1) * passage_modeling, axis=-2)
    passage_weighted_by_predicted_span = K.expand_dims(weighted_sum, axis=-2)
    passage_weighted_by_predicted_span = K.tile(passage_weighted_by_predicted_span, [1, 1, Params.max_passage_len, 1])
    multiply = passage_modeling * passage_weighted_by_predicted_span
    
    return K.concatenate([passage_context, passage_modeling, passage_weighted_by_predicted_span, multiply])

SpanEndRepresentation = Lambda(span_end_representation, name="span_end_representation")

In [18]:
class ContentIndice(Layer):
    
    def __init__(self, **kwargs):
        super(ContentIndice, self).__init__(**kwargs)
        
    def build(self, input_shape):
        self.dense_1 = Dense(Params.embedding_dim, activation="relu")
        self.dense_1.build(input_shape)
        self.dense_2 = Dense(1, activation="relu")
        self.dense_2.build(input_shape[:-1] + (Params.embedding_dim, ))
        self.trainable_weights = self.dense_1.trainable_weights + self.dense_2.trainable_weights
        
        super(ContentIndice, self).build(input_shape)
        
    def call(self, passage_modeling):
        passage_representation = self.dense_1(passage_modeling)
        passage_representation = self.dense_2(passage_representation)
        passage_representation = K.squeeze(passage_representation, axis=-1)
        passage_indices = Softmax(axis=-1)(passage_representation)
        return passage_indices
    
    def compute_output_shape(self, input_shape):
        return input_shape[:-1]
    
    def get_config(self):
        config = super().get_config()
        return config

In [19]:
weighted_sum = K.sum(K.expand_dims(span_begin_probabilities, axis=-1) * passage_modeling, axis=-2)

NameError: name 'span_begin_probabilities' is not defined

In [20]:
weighted_sum

NameError: name 'weighted_sum' is not defined

In [21]:
passage_weighted_by_predicted_span = K.expand_dims(weighted_sum, axis=-2)

NameError: name 'weighted_sum' is not defined

In [None]:
passage_weighted_by_predicted_span

In [22]:
passage_weighted_by_predicted_span = K.tile(passage_weighted_by_predicted_span, [1, 1, Params.max_passage_len, 1])

NameError: name 'passage_weighted_by_predicted_span' is not defined

In [23]:
passage_weighted_by_predicted_span

NameError: name 'passage_weighted_by_predicted_span' is not defined

In [24]:
multiply = passage_modeling * passage_weighted_by_predicted_span

NameError: name 'passage_modeling' is not defined

In [25]:
multiply

NameError: name 'multiply' is not defined

In [30]:
class ContextEncoding(Layer):
    def __init__(self, question_encoding, **kwargs):
        self.question_encoding = question_encoding
        super(ContextEncoding, self).__init__(**kwargs)

    def build(self, input_shape):
        self.c2qAttention = C2QAttention(name="context_to_query_attention")
        self.c2qAttention.build(input_shape)
        self.q2cAttention =  Q2CAttention(name='query_to_context_attention')
        self.q2cAttention.build(input_shape)
        self.mergedContext = MergedContext(name='merged_context')
        self.mergedContext.build(input_shape)

        self.trainable_weights = self.c2qAttention.trainable_weights + self.q2cAttention.trainable_weights + self.mergedContext.trainable_weights
        super(ContextEncoding, self).build(input_shape)


    def call(self, passage_encoding):
        question_encoding = self.question_encoding
        # question_encoding, passage_encoding = input
        score_matrix = K.squeeze(K.dot(passage_encoding, K.permute_dimensions(self.question_encoding, (0, 2, 1))), axis=-1)

        context_to_query_attention = self.c2qAttention([
                                                    score_matrix, question_encoding])
        query_to_context_attention = self.q2cAttention([score_matrix, passage_encoding])

        merged_context = self.mergedContext(
                                [passage_encoding, context_to_query_attention, query_to_context_attention])

        # modeled_passage = Bidirectional(LSTM(50, recurrent_dropout=0.2, return_sequences=True), name="passage_context_encoding")(modeled_passage)
        return merged_context
    
    def compute_output_shape(self, input_shape):
        print(input_shape)
        return (None, input_shape[1], 512)

    def get_config(self):
        config = super().get_config()
        return config

In [26]:
class SpanEnd(Layer):

    def __init__(self, **kwargs):
        super(SpanEnd, self).__init__(**kwargs)

    def build(self, input_shape):
        embdim = Params.embdim
        input_shape_bilstm_1 = input_shape[0][:-1] + (embdim*14, )
        self.bilstm_1 = Bidirectional(LSTM(emdim, return_sequences=True))
        self.bilstm_1.build(input_shape_bilstm_1)
        input_shape_dense_1 = input_shape[0][:-1] + (emdim*10, )
        self.dense_1 = Dense(units=1)
        self.dense_1.build(input_shape_dense_1)
        self.trainable_weights = self.bilstm_1.trainable_weights + self.dense_1.trainable_weights
        super(SpanEnd, self).build(input_shape)

    def call(self, inputs):
        encoded_passage, merged_context, modeled_passage, span_begin_probabilities = inputs
        weighted_sum = K.sum(K.expand_dims(span_begin_probabilities, axis=-1) * modeled_passage, -2)
        passage_weighted_by_predicted_span = K.expand_dims(weighted_sum, axis=1)
        tile_shape = K.concatenate([[1], [K.shape(encoded_passage)[1]], [1]], axis=0)
        passage_weighted_by_predicted_span = K.tile(passage_weighted_by_predicted_span, tile_shape)
        multiply1 = modeled_passage * passage_weighted_by_predicted_span
        span_end_representation = K.concatenate(
            [merged_context, modeled_passage, passage_weighted_by_predicted_span, multiply1])
        
        span_end_representation = self.bilstm_1(span_end_representation)

        span_end_input = K.concatenate([merged_context, span_end_representation])

        span_end_weights = TimeDistributed(self.dense_1)(span_end_input)

        span_end_probabilities = Softmax()(K.squeeze(span_end_weights, axis=-1))
        return span_end_probabilities

    def compute_output_shape(self, input_shape):
        _, merged_context_shape, _, _ = input_shape
        return merged_context_shape[:-1]

    def get_config(self):
        config = super().get_config()
        return config


In [29]:



class Q2CAttention(Layer):

    def __init__(self, **kwargs):
        super(Q2CAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        super(Q2CAttention, self).build(input_shape)

    def call(self, inputs):
        similarity_matrix, encoded_context = inputs
        max_similarity = K.max(similarity_matrix, axis=-1)
        # by default, axis = -1 in Softmax
        context_to_query_attention = Softmax()(max_similarity)
        weighted_sum = K.sum(K.expand_dims(context_to_query_attention, axis=-1) * encoded_context, -2)
        expanded_weighted_sum = K.expand_dims(weighted_sum, 1)
        num_of_repeatations = K.shape(encoded_context)[1]
        return K.tile(expanded_weighted_sum, [1, num_of_repeatations, 1])

    def compute_output_shape(self, input_shape):
        similarity_matrix_shape, encoded_context_shape = input_shape
        return similarity_matrix_shape[:-1] + encoded_context_shape[-1:]

    def get_config(self):
        config = super().get_config()
        return config

from keras.engine.topology import Layer
from keras.layers.advanced_activations import Softmax
from keras import backend as K


class C2QAttention(Layer):

    def __init__(self, **kwargs):
        super(C2QAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        super(C2QAttention, self).build(input_shape)

    def call(self, inputs):
        similarity_matrix, encoded_question = inputs
        context_to_query_attention = Softmax(axis=-1)(similarity_matrix)
        encoded_question = K.expand_dims(encoded_question, axis=1)
        return K.sum(K.expand_dims(context_to_query_attention, axis=-1) * encoded_question, -2)

    def compute_output_shape(self, input_shape):
        similarity_matrix_shape, encoded_question_shape = input_shape
        return similarity_matrix_shape[:-1] + encoded_question_shape[-1:]

    def get_config(self):
        config = super().get_config()
        return config

from keras.engine.topology import Layer
from keras import backend as K


class MergedContext(Layer):

    def __init__(self, **kwargs):
        super(MergedContext, self).__init__(**kwargs)

    def build(self, input_shape):
        super(MergedContext, self).build(input_shape)

    def call(self, inputs):
        encoded_context, context_to_query_attention, query_to_context_attention = inputs
        element_wise_multiply1 = encoded_context * context_to_query_attention
        element_wise_multiply2 = encoded_context * query_to_context_attention
        concatenated_tensor = K.concatenate(
            [encoded_context, context_to_query_attention, element_wise_multiply1, element_wise_multiply2], axis=-1)
        return concatenated_tensor

    def compute_output_shape(self, input_shape):
        encoded_context_shape, _, _ = input_shape
        return encoded_context_shape[:-1] + (encoded_context_shape[-1] * 4, )

    def get_config(self):
        config = super().get_config()
        return config
