In [1]:
from time import time
import pandas as pd
from sklearn.model_selection import train_test_split
import keras
from gensim.models import KeyedVectors
from keras.models import Model, load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Input, Layer, Embedding, LSTM, Dense, Flatten, Activation, RepeatVector, Permute, Lambda, \
Bidirectional, TimeDistributed, Dropout, Conv1D, GlobalMaxPool1D
from keras.layers.merge import multiply, concatenate
import keras.backend as K
from util import make_w2v_embeddings, split_and_zero_padding

In [2]:
import numpy as np
embeddings = np.load('./embeddings.npy')
len(embeddings)

11715

In [3]:
batch_size = 1024
n_epoch = 50
n_hidden = 50
embedding_dim = 300
max_seq_length = 10

In [4]:
class ManDist(Layer):

    def __init__(self, **kwargs):
        self.result = None
        super(ManDist, self).__init__(**kwargs)

    def build(self, input_shape):
        super(ManDist, self).build(input_shape)

    def call(self, x, **kwargs):
        self.result = K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True))
        return self.result

    def compute_output_shape(self, input_shape):
        return K.int_shape(self.result)

In [5]:
def shared_model(_input):
    len_embeddings = 11715
    embedded = Embedding(len_embeddings, embedding_dim, weights=[embeddings], input_shape=(max_seq_length,), \
                         trainable=False)(_input)

    # Bi-LSTM
    activations = Bidirectional(LSTM(n_hidden, return_sequences=True), merge_mode='concat')(embedded)
    activations = Bidirectional(LSTM(n_hidden, return_sequences=True), merge_mode='concat')(activations)

    # dropout
    activations = Dropout(0.5)(activations)

    # Attention
    attention = TimeDistributed(Dense(1, activation='tanh'))(activations)
    attention = Flatten()(attention)
    attention = Activation('softmax')(attention)
    attention = RepeatVector(n_hidden * 2)(attention)
    attention = Permute([2, 1])(attention)
    sent_representation = multiply([activations, attention])
    sent_representation = Lambda(lambda x_lambda: K.sum(x_lambda, axis=1))(sent_representation)

    # DropOut
    sent_representation = Dropout(0.1)(sent_representation)

    return sent_representation

In [6]:
def create_model():
    left_input = Input(shape=(max_seq_length,), dtype='float32')
    right_input = Input(shape=(max_seq_length,), dtype='float32')
    left_sen_representation = shared_model(left_input)
    right_sen_representation = shared_model(right_input)


    man_distance = ManDist()([left_sen_representation, right_sen_representation])
    sen_representation = concatenate([left_sen_representation, right_sen_representation, man_distance])
    similarity = Dense(1, activation='sigmoid')(Dense(2)(Dense(4)(Dense(16)(sen_representation))))
    model = Model(inputs=[left_input, right_input], outputs=[similarity])
    
    return model

In [15]:
def load_model():
    model = create_model()
    model.load_weights('./data/SiameseLSTM.h5')
    return model

In [16]:
model = load_model()

In [17]:
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 10)]         0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            [(None, 10)]         0                                            
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 10, 300)      3514500     input_7[0][0]                    
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, 10, 300)      3514500     input_8[0][0]                    
____________________________________________________________________________________________

In [21]:
model.predict(["hey there", "lol"])



AssertionError: in user code:

    /home/arghyadeep99/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1147 predict_function  *
        outputs = self.distribute_strategy.run(
    /home/arghyadeep99/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/arghyadeep99/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/arghyadeep99/.local/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/arghyadeep99/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1122 predict_step  **
        return self(x, training=False)
    /home/arghyadeep99/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:927 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/arghyadeep99/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py:717 call
        return self._run_internal_graph(
    /home/arghyadeep99/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py:899 _run_internal_graph
        assert str(id(x)) in tensor_dict, 'Could not compute output ' + str(x)

    AssertionError: Could not compute output Tensor("dense_20/Identity:0", shape=(None, 1), dtype=float32)
