In [14]:
import numpy as np

In [15]:
def position_encoding(sentence_size, embedding_size):

    encoding = np.ones((embedding_size, sentence_size), dtype=np.float32)
    ls = sentence_size+1
    le = embedding_size+1
    for i in range(1, le):
        for j in range(1, ls):
            encoding[i-1, j-1] = (i - (le-1)/2) * (j - (ls-1)/2)
    encoding = 1 + 4 * encoding / embedding_size / sentence_size
    
    return np.transpose(encoding)

# memn2n for CBT (3 hops)

In [16]:
import tensorflow as tf
from tensorflow.contrib.layers import xavier_initializer as xinit

## Parameters

In [17]:
L = 20
N = 10
vocab_size = 5000
emb_dim = 150
hops = 3

## Graph

In [18]:
tf.reset_default_graph()

## Placeholders

In [19]:
questions = tf.placeholder(tf.int32, shape=[None, L], name='questions' )
stories = tf.placeholder(tf.int32, shape=[None, N, L], name='stories' )
answers = tf.placeholder(tf.int32, shape=[None, ], name='answers' )

## Embedding Matrices

In [20]:
A = tf.get_variable('A', shape=[vocab_size, emb_dim], dtype=tf.float32, 
                   initializer=xinit())
B = tf.get_variable('B', shape=[vocab_size, emb_dim], dtype=tf.float32, 
                   initializer=xinit())
C = tf.get_variable('C', shape=[vocab_size, emb_dim], dtype=tf.float32, 
                   initializer=xinit())

In [21]:
pos_enc = position_encoding(L, emb_dim)

## Embed Question

In [22]:
u0 = tf.nn.embedding_lookup(B, questions)
u0 = tf.reduce_sum(u0 * pos_enc, axis=1)

- accumulate `u`

In [23]:
u = [u0]

## Define weights H, TA

In [24]:
H = tf.get_variable('H', dtype=tf.float32, shape=[emb_dim, emb_dim],
                   initializer=xinit())
TA = tf.get_variable('TA', dtype=tf.float32, shape=[N, emb_dim],
                   initializer=xinit())

## Embed Story

In [25]:
# embed with A
m = tf.nn.embedding_lookup(A, stories)
m = tf.reduce_sum(m * pos_enc, axis=2) + TA # try mean()
# embed with C
c = tf.nn.embedding_lookup(C, stories)
c = tf.reduce_sum(c * pos_enc, axis=2)
c = tf.transpose(c, [0, 2, 1])

## Memory Loop

In [26]:
for k in range(hops):
    p = tf.nn.softmax(tf.reduce_sum(m*u[-1], axis=2))
    o = tf.reduce_sum(c*p, axis=-1)
    u_k = tf.matmul(u[-1], H) + o
    u.append(u_k)

## Answer

In [27]:
W = tf.get_variable('W', dtype=tf.float32, shape=[emb_dim, vocab_size],
                   initializer=xinit())

In [28]:
logits = tf.matmul(u[-1], W)
a = tf.nn.softmax(logits)

## Optimization

In [29]:
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=logits,
    labels=answers
    )
loss = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(loss)