In [20]:
import numpy as np
import pandas as pd

import keras
from keras.utils.data_utils import get_file
from keras.layers import Dense, Input, LSTM,Flatten,Reshape, Embedding, Dropout, Activation,GlobalAveragePooling1D,Lambda,Bidirectional
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam, RMSprop
from keras import backend as K
from keras.layers.embeddings import Embedding

# Load Data

In [7]:
q1_data = np.load(open('data/processed/q1_processed.npy', 'rb'))
q2_data = np.load(open('data/processed/q2_processed.npy', 'rb'))

labels = np.load(open('data/processed/label_processed.npy', 'rb'))
embedding_matrix = np.load(open('data/processed/glove_word_embedding_matrix.npy', 'rb'))

In [8]:
from sklearn.cross_validation import train_test_split

X = np.stack((q1_data, q2_data), axis=1)
target = labels

X_train, X_val, y_train, y_val = train_test_split(X, target, test_size=0.25, random_state=126, stratify=target)
Q1_train = X_train[:,0]
Q2_train = X_train[:,1]
Q1_val = X_val[:,0]
Q2_val = X_val[:,1]

print 'Q1_train shape {}'.format(Q1_train.shape)
print 'Q2_train shape {}'.format(Q2_train.shape)
print 'y_train shape {}'.format(y_train.shape)

print 'Q1_val shape {}'.format(Q1_val.shape)
print 'Q2_val shape {}'.format(Q2_val.shape)
print 'y_val shape {}'.format(y_val.shape)



Q1_train shape (303216, 30)
Q2_train shape (303216, 30)
y_train shape (303216,)
Q1_val shape (101072, 30)
Q2_val shape (101072, 30)
y_val shape (101072,)


In [24]:
EMBED_DIM = 300
HIDDEN_DIM = 50
BATCH_SIZE = 256
NBR_EPOCHS = 10
seq_maxlen = 30

m,n = embedding_matrix.shape


In [28]:

ques1_enc = Sequential()
ques1_enc.add(Embedding(m,EMBED_DIM,
        weights=[embedding_matrix],
        input_length=seq_maxlen,trainable=False))
ques1_enc.add(LSTM(HIDDEN_DIM, return_sequences=True))
ques1_enc.add(Dropout(0.3))

ques2_enc = Sequential()
ques2_enc.add(Embedding(m,EMBED_DIM,
        weights=[embedding_matrix],
        input_length=seq_maxlen,trainable=False))
ques2_enc.add(LSTM(HIDDEN_DIM, return_sequences=True))
ques2_enc.add(Dropout(0.3))

attn = Sequential()
attn.add(Merge([ques1_enc, ques2_enc], mode="dot", dot_axes=[1, 1]))
attn.add(Flatten())
attn.add(Dense((seq_maxlen * HIDDEN_DIM)))
attn.add(Reshape((seq_maxlen, HIDDEN_DIM)))

model = Sequential()
model.add(Merge([ques1_enc, attn], mode="sum"))
model.add(Flatten())
model.add(Dense(1, activation="sigmoid"))

model.compile(optimizer="adam", loss="binary_crossentropy",
              metrics=["accuracy"])
early_stopping =EarlyStopping(monitor='val_loss', patience=3)
model.summary()




_________________________________________________________________
Layer (type)                 Output Shape              Param #   
merge_19 (Merge)             (None, 30, 50)            0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 1500)              0         
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 1501      
Total params: 55,218,601
Trainable params: 3,893,401
Non-trainable params: 51,325,200
_________________________________________________________________




In [29]:
print("Training...")
# model.fit([Q1_train, Q2_train], y_train, batch_size=BATCH_SIZE,
#           epochs=NBR_EPOCHS, 
#           validation_split=0.1,
#           verbose=1)
hist=model.fit([Q1_train, Q2_train], y_train, validation_data=([Q1_val, Q2_val], y_val), verbose=1, 
          epochs=NBR_EPOCHS, batch_size=BATCH_SIZE, shuffle=True, callbacks=[early_stopping])


Training...
Train on 303216 samples, validate on 101072 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [30]:
loss, acc = model.evaluate([Q1_val, Q2_val], y_val, batch_size=BATCH_SIZE)
print("Test loss/accuracy final model = %.4f, %.4f" % (loss, acc))

Test loss/accuracy final model = 0.4226, 0.8066


In [32]:
import os
model.save_weights(os.path.join("data/model", "lstm_attention.hdf5"))
with open(os.path.join("data/model", "lstm_attention.json"), "wb") as fjson:
    fjson.write(model.to_json())