### Deep relevance matching model

In [1]:
import os, sys, time
import numpy as np
from numpy.linalg import norm
import pandas as pd
from tqdm import tqdm
import cPickle as pk
np.random.seed(1) # to be reproducible

In [2]:
W2V_FPATH = '/local/XW/DATA/WORD_EMBEDDINGS/biomed-w2v-200.txt'
GLOVE_FPATH = '/local/XW/DATA/WORD_EMBEDDINGS/glove.6B.200d.txt'

In [3]:
word2vec = {} # maps word ---> embedding vector
with open(GLOVE_FPATH) as f:
    for line in tqdm(f, total=400000): #5443657
        vals = line.split()
        word = vals[0]
        vec = np.asarray(vals[1:], dtype='float')
#         vec /=  norm(vec)
        word2vec[word] = vec
print 'found %d word vectors.' % len(word2vec)

100%|██████████| 400000/400000 [00:13<00:00, 30271.32it/s]

found 400000 word vectors.





### prepare toy data

In [4]:
query = 'football game article'
documents = \
["about the bible quiz answers in article healta saturn wwc edu healta saturn wwc edu tammy r healy writes the cheribums are on the ark of the covenant when god said make no graven image he was refering to idols which were created to be worshipped the ark of the covenant wasn t wrodhipped and only the high priest could enter the holy of holies where it was kept once a year on the day of atonement i am not familiar with or knowledgeable about the original language but i believe there is a word for idol and that the translator would have used the word idol instead of graven image had the original said idol so i think you re wrong here but then again i could be too i just suggesting a way to determine whether the interpretation you offer is correct dean kaflowitz",
 "amusing atheists and agnostics in article timmbake mcl timmbake mcl ucsb edu clam bake timmons writes fallacy atheism is a faith lo i hear the faq beckoning once again wonderful rule deleted you re correct you didn t say anything about a conspiracy correction hard atheism is a faith yes rule don t mix apples with oranges how can you say that the extermination by the mongols was worse than stalin khan conquered people unsympathetic to his cause that was atrocious but stalin killed millions of his own people who loved and worshipped him and his atheist state how can anyone be worse than that i will not explain this to you again stalin did nothing in the name of atheism whethe he was or was not an atheist is irrelevant get a grip man the stalin example was brought up not as an indictment of atheism but merely as another example of how people will kill others under any name that s fit for the occasion no look again while you never said it the implication is pretty clear i m sorry but i can only respond to your words not your true meaning usenet is a slippery medium deleted wrt the burden of proof so hard atheism has nothing to prove then how does it justify that god does not exist i know there s the faq etc but guess what if those justifications were so compelling why aren t people flocking to hard atheism they re not and they won t i for one will discourage people from hard atheism by pointing out those very sources as reliable statements on hard atheism look i m not supporting any dogmatic position i d be a fool to say that in the large group of people that are atheists no people exist who wish to proselytize in the same fashion as religion how many hard atheists do you see posting here anyway maybe i mm just not looking hard enough second what makes you think i m defending any given religion i m merely recognizing hard atheism for what it is a faith i never meant to do so although i understand where you might get that idea i was merely using the bible example as an allegory to illustrate my point and yes by we i am referring to every reader of the post where is the evidence that the poster stated that he relied upon evidence for what who i think i may have lost this thread why theists are arrogant deleted because they say such and such is absolutely unalterably true because my dogma says it is true i am not prepared to issue blanket statements indicting all theists of arrogance as you are wont to do with atheists bzzt by virtue of your innocent little pronoun they you ve just issued a blanket statement at least i will apologize by qualifying my original statement with hard atheist in place of atheist would you call john the baptist arrogant who boasted of one greater than he that s what many christians do today how is that in itself arrogant guilty as charged what i meant to say was the theists who are arrogant are this way because they say other than that i thought my meaning was clear enough any position that claims itself as superior to another with no supporting evidence is arrogant thanks for your apology btw i m not worthy only seriously misinformed with your sophisticated put down of they the theists your serious misinformation shines through explained above bake timmons iii there s nothing higher stronger more wholesome and more useful in life than some good memory alyosha in brothers karamazov dostoevsky"]

In [5]:
def get_histvec(q_wd, doc):
    randvec = np.random.randn(200)
    qvec = word2vec.get(q_wd, randvec)
    dvecs = np.vstack( [word2vec.get(wd, randvec) for wd in doc.split()] )
    cossims = np.dot(dvecs, qvec) / norm(qvec) / norm(dvecs, axis=1)
    hist, _ = np.histogram( cossims[cossims<1.0], bins=29, range=(-1,1) )
    ones = len(cossims) - sum(hist)
    ret = np.array( list(hist) + [ones] )
    return ret # np.reshape(ret, (-1, 30))

In [6]:
word2idf = {'football': 0.7, 'game': 0.5, 'article': 0.3, 'hello': 0.1}
N = 3 # max query length

# input examples
hists = np.array([ get_histvec(wd, documents[0]) for wd in query.split()])
hists2 = np.array([ get_histvec(wd, documents[1]) for wd in query.split()])
idfs = np.array([word2idf.get(wd,0) for wd in query.split()])
idfs = idfs.reshape((-1,N))
print hists.shape, idfs.shape

(3, 30) (1, 3)


### construct the relevance IR model

In [7]:
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, InputLayer, Flatten, Input, Merge, merge, Reshape
import keras.backend as K

Using TensorFlow backend.


In [8]:
# 2 main components of the structure: feed forward network and gating
feed_forward = Sequential(
    [Dense(input_dim=30, output_dim=5, activation='tanh'),
     Dense(output_dim=1, activation='tanh')], 
    name='feed_forward_nw')

# inputs_hists = Input(shape=(N,30,))
# inputs_hist = Input(shape=(30,))
# hidden1 = Dense(5, activation='tanh')(inputs_hist)
# hidden2 = Dense(1, activation='tanh')(hidden1)

# model.compile()

# zs = feed_forward.predict( hists )
# print zs

In [9]:
# ***note: have to wrap ops into Lambda layers !!***
# cf: https://groups.google.com/forum/#!topic/keras-users/fbRS-FkZw_Q
from keras.layers.core import Lambda

input_idf = Input(shape=(N,), name='input_idf')

def scale(x): 
    w = K.variable(1e-10, name='w_g')
    return K.mul(x,w)
def scale_output_shape(input_shape): return input_shape

scaled = Lambda(scale, scale_output_shape)(input_idf)
gs_out = Activation('softmax')(scaled)
gating = Model(input=input_idf, output=gs_out, name='gating')
# gating = Sequential([InputLayer(input_shape=(N,)),
#                      Activation('softmax')], name='gating')

# print gating.predict(idfs)

In [10]:
input_hists = Input(shape=(N,30,), name='input_hists')

def slicei(x, i=0): return x[:,i,:]
def slicei_output_shape(input_shape): return (input_shape[0], input_shape[2])
zs = [feed_forward( Lambda(lambda x:slicei(x,i), slicei_output_shape)(input_hists) ) for i in xrange(N) ]

def concat(x): return K.reshape(K.concatenate(x), (-1,3))
def concat_output_shape(input_shape): return (input_shape[0], N)
zs = Lambda(concat, concat_output_shape)(zs)

input_idf = Input(shape=(N,), name='input_idf')
gs = gating(input_idf)

# print zs.get_shape(), gs.get_shape()

def innerprod(x): return K.sum( K.mul(x[0],x[1]), axis=1)
def innerprod_output_shape(input_shape): return (input_shape[0])
scores = Lambda(innerprod, innerprod_output_shape, name='elemul')([zs, gs])

# print scores.get_shape()

model = Model(input=[input_hists, input_idf], output=[scores])

In [11]:
model.predict( [hists.reshape((1,3,30)), idfs] )

array([ 0.75311828], dtype=float32)

In [12]:
input_hists_pos = Input(shape=(N,30,), name='input_hists_pos')
input_hists_neg = Input(shape=(N,30,), name='input_hists_neg')

s_pos = model([input_hists_pos,input_idf])
s_neg = model([input_hists_neg, input_idf])

def diff(x): return x[0]-x[1]
def diff_output_shape(input_shape): return input_shape
diff = Lambda(diff, diff_output_shape)([s_pos,s_neg])

In [13]:
mmodel = Model(input=[input_idf, input_hists_pos,  input_hists_neg], output=[diff])

In [14]:
print mmodel.predict([idfs, hists.reshape((1,3,30)), hists2.reshape((1,3,30))])
print mmodel.predict([idfs, hists2.reshape((1,3,30)), hists.reshape((1,3,30))])

[-0.3206169]
[ 0.3206169]


In [15]:
# define my loss function: hinge of score_pos - score_neg
def pairwise_hinge(y_true, y_pred): # y_pred = score_pos - score_neg, **y_true doesn't matter here**
    return K.mean( K.maximum(1. - y_pred, y_true*0.0) )  

mmodel.compile(optimizer='adagrad', loss=pairwise_hinge)

In [16]:
mmodel.fit( [idfs, hists.reshape(1,3,30), hists2.reshape(1,3,30)], np.array([1.0]) )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fce6c03d4d0>

In [17]:
# inputs_hists = Input(shape=(N,30,))
# print inputs_hists.get_shape()
# zs = [feed_forward(inputs_hists[:,i,:]) for i in xrange(N)]
# zs = K.concatenate(zs, axis=1)
# print zs.get_shape()

# gs = gating(inputs_idf)
# print gs.get_shape()
# # print K.dot(gs, K.transpose(zs)).get_shape()
# # scores = K.dot(gs, K.transpose(zs))

# elemmul = K.mul(gs, zs)
# print elemmul.get_shape()
# # import tensorflow as tf 

# # tf.reduce_sum(elemmul, 1)
# scores = K.sum(elemmul,axis=1)
# scores = K.reshape(scores, [-1,1])
# print scores.get_shape()


# mmodel.fit( [idfs, hists.reshape(1,3,30), hists2.reshape(1,3,30)], np.array([1.0]) )
# Epoch 1/10
# 1/1 [==============================] - 0s - loss: 1.3206
# Epoch 2/10
# 1/1 [==============================] - 0s - loss: 0.9313
# Epoch 3/10
# 1/1 [==============================] - 0s - loss: 0.5628
# Epoch 4/10
# 1/1 [==============================] - 0s - loss: 0.5417
# Epoch 5/10
# 1/1 [==============================] - 0s - loss: 0.5298
# Epoch 6/10
# 1/1 [==============================] - 0s - loss: 0.4126
# Epoch 7/10
# 1/1 [==============================] - 0s - loss: 0.3700
# Epoch 8/10
# 1/1 [==============================] - 0s - loss: 0.3377
# Epoch 9/10
# 1/1 [==============================] - 0s - loss: 0.3101
# Epoch 10/10
# 1/1 [==============================] - 0s - loss: 0.2855
