In [1]:
import pandas as pd
import numpy as np
import keras
from keras.utils.data_utils import get_file
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation,GlobalAveragePooling1D,Lambda,Bidirectional
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam, RMSprop
from keras import backend as K
from keras.layers.embeddings import Embedding

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
q1_data = np.load(open('data/processed/q1_processed.npy', 'rb'))
q2_data = np.load(open('data/processed/q2_processed.npy', 'rb'))

labels = np.load(open('data/processed/label_processed.npy', 'rb'))
embedding_matrix = np.load(open('data/processed/glove_word_embedding_matrix.npy', 'rb'))

In [3]:
from sklearn.cross_validation import train_test_split

X = np.stack((q1_data, q2_data), axis=1)
target = labels

X_train, X_val, y_train, y_val = train_test_split(X, target, test_size=0.25, random_state=126, stratify=target)
Q1_train = X_train[:,0]
Q2_train = X_train[:,1]
Q1_val = X_val[:,0]
Q2_val = X_val[:,1]



In [4]:
def vec_distance(vects):
    x, y = vects
    return K.sum(K.square(x - y), axis=1, keepdims=True)
def vec_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)

In [15]:
nb_words=len(embedding_matrix)
max_sentence_len=30
embedding_layer = Embedding(nb_words,300,
        weights=[embedding_matrix],
        input_length=max_sentence_len,trainable=False)

In [16]:
lstm_layer =LSTM(128)

sequence_1_input = Input(shape=(max_sentence_len,), dtype='int32')
embedded_sequences_1 = embedding_layer(sequence_1_input)
x1 = lstm_layer(embedded_sequences_1)

sequence_2_input = Input(shape=(max_sentence_len,), dtype='int32')
embedded_sequences_2 = embedding_layer(sequence_2_input)
y1 = lstm_layer(embedded_sequences_2)

distance=Lambda(vec_distance, output_shape=vec_output_shape)([x1, y1])
dense1=Dense(16, activation='sigmoid')(distance)
dense1 = Dropout(0.3)(dense1)

bn2 = BatchNormalization()(dense1)
prediction=Dense(1, activation='sigmoid')(bn2)

model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=prediction)

In [17]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 30)           0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 30, 300)      25662600    input_5[0][0]                    
                                                                 input_6[0][0]                    
__________________________________________________________________________________________________
lstm_3 (LSTM)                   (None, 128)          219648      embedding_2[0][0]                
          

In [18]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
early_stopping =EarlyStopping(monitor='val_loss', patience=3)

In [19]:
hist=model.fit([Q1_train, Q2_train], y_train, validation_data=([Q1_val, Q2_val], y_val), verbose=1, 
          epochs=10, batch_size=256, shuffle=True,class_weight=None, callbacks=[early_stopping])

Train on 303216 samples, validate on 101072 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
