In [17]:
import numpy as np

In [18]:
def position_encoding(sentence_size, embedding_size):

    encoding = np.ones((embedding_size, sentence_size), dtype=np.float32)
    ls = sentence_size+1
    le = embedding_size+1
    for i in range(1, le):
        for j in range(1, ls):
            encoding[i-1, j-1] = (i - (le-1)/2) * (j - (ls-1)/2)
    encoding = 1 + 4 * encoding / embedding_size / sentence_size
    
    return np.transpose(encoding)

# memn2n for CBT (1 hop)

In [19]:
import tensorflow as tf
from tensorflow.contrib.layers import xavier_initializer as xinit

## Parameters

In [20]:
L = 20
N = 10
vocab_size = 5000
emb_dim = 150

## Graph

In [21]:
tf.reset_default_graph()

## Placeholders

In [22]:
questions = tf.placeholder(tf.int32, shape=[None, L], name='questions' )
stories = tf.placeholder(tf.int32, shape=[None, N, L], name='stories' )
answers = tf.placeholder(tf.int32, shape=[None, ], name='answers' )

## Embedding Matrices

In [23]:
A = tf.get_variable('A', shape=[vocab_size, emb_dim], dtype=tf.float32, 
                   initializer=xinit())
B = tf.get_variable('B', shape=[vocab_size, emb_dim], dtype=tf.float32, 
                   initializer=xinit())
C = tf.get_variable('C', shape=[vocab_size, emb_dim], dtype=tf.float32, 
                   initializer=xinit())

In [24]:
pos_enc = position_encoding(L, emb_dim)

## Embed Question

In [30]:
u0 = tf.nn.embedding_lookup(B, questions)
u0 = tf.reduce_sum(u0 * pos_enc, axis=1)

- accumulate `u`

In [32]:
u = [u0]

## Embed Story

In [36]:
m = tf.nn.embedding_lookup(A, stories)
m = tf.reduce_sum(m * pos_enc, axis=2)
# try mean()

## Attention Probability

In [46]:
probs = tf.nn.softmax(tf.reduce_sum(m*u, axis=2))

In [47]:
probs

<tf.Tensor 'Softmax_1:0' shape=(?, 10) dtype=float32>

## Calculate 'o'

In [53]:
c = tf.nn.embedding_lookup(C, stories)
c = tf.reduce_sum(c * pos_enc, axis=2)
o = tf.transpose(c, [0, 2, 1])*probs
o = tf.reduce_sum(o, axis=-1)

In [55]:
o

<tf.Tensor 'Sum_7:0' shape=(?, 150) dtype=float32>

## Output Representation

In [56]:
u[-1]

<tf.Tensor 'Sum:0' shape=(?, 150) dtype=float32>

In [57]:
o

<tf.Tensor 'Sum_7:0' shape=(?, 150) dtype=float32>

In [59]:
H = tf.get_variable('H', dtype=tf.float32, shape=[emb_dim, emb_dim],
                   initializer=xinit())

In [60]:
u_final = tf.matmul(u[-1], H) + o

## Answer

In [61]:
W = tf.get_variable('W', dtype=tf.float32, shape=[emb_dim, vocab_size],
                   initializer=xinit())

In [62]:
logits = tf.matmul(u_final, W)
a = tf.nn.softmax(logits)

## Optimization

In [66]:
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=logits,
    labels=answers
    )
loss = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(loss)