# Siamese Network

Keras model which encodes an abstract and a summary and uses a contrastive loss to either map them close together or push them apart.

### Hyperparameters

In [74]:
text_len = abstract_len = summary_len = 5
word_dim, vocab_size = 5, 10
nb_filter, filter_len = 2, 1

### Model Sharing

In [98]:
from keras.layers import Input, Embedding, LSTM, merge
from keras.models import Model
from keras.layers.core import Activation

# inputs
text = Input(shape=[abstract_len], dtype='int32')
embedded = Embedding(output_dim=word_dim, input_dim=vocab_size, input_length=text_len, mask_zero=True, weights=None)(text)
vector = LSTM(output_dim=2)(embedded)
embed_text = Model(input=text, output=vector)

# define model
abstract, summary = Input(shape=[text_len], dtype='int32'), Input(shape=[text_len], dtype='int32')
abstract_vec, summary_vec = embed_text(abstract), embed_text(summary)
score = merge(inputs=[abstract_vec, summary_vec], mode='dot', dot_axes=1) # won't work without `dot_axes=1` (!!)

model = Model(input=[abstract, summary], output=score)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_36 (InputLayer)            (None, 5)             0                                            
____________________________________________________________________________________________________
input_37 (InputLayer)            (None, 5)             0                                            
____________________________________________________________________________________________________
model_17 (Model)                 (None, 2)             114         input_36[0][0]                   
                                                                   input_37[0][0]                   
____________________________________________________________________________________________________
merge_14 (Merge)                 (None, 1)             0           model_17[1][0]          

### Train Model

In [99]:
from keras.utils.np_utils import to_categorical

nb_train = 2

X_abstract = np.array([np.random.choice(vocab_size, size=text_len) for _ in range(nb_train)])
X_summary = np.array([np.random.choice(vocab_size, size=text_len) for _ in range(nb_train)])
y = np.random.choice([0, 1], size=nb_train)

model.fit([X_abstract, X_summary], y)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1481d3ed0>

Sanity check the dot operation...

In [101]:
import keras.backend as K

f = K.function(inputs=model.inputs, outputs=model.layers[-1].input + [model.layers[-1].output])

f([X_abstract, X_summary])

[array([[ 0.00185674, -0.00755589],
        [ 0.00027848, -0.01449449]], dtype=float32),
 array([[ 0.00390792,  0.00459523],
        [-0.00211801,  0.00029689]], dtype=float32),
 array([[ -2.74650884e-05],
        [ -4.89312652e-06]], dtype=float32)]