**Dual LSTM Encoder for Dialog Response Generation**

http://www.wildml.com/2016/07/deep-learning-for-chatbots-2-retrieval-based-model-tensorflow/

https://github.com/dennybritz/chatbot-retrieval

https://github.com/rkadlec/ubuntu-ranking-dataset-creator

https://arxiv.org/abs/1506.08909

In [1]:
import tensorflow as tf
tf.VERSION

'1.2.0'

## 1. Word Embedding

In [2]:
graph = tf.Graph()
graph.as_default()
session = tf.InteractiveSession(graph=graph)
session

<tensorflow.python.client.session.InteractiveSession at 0x7f3aa8ded550>

In [3]:
vocab_size = 4
embed_size = 2

word_0 = [0, 0]
word_1 = [1, 0]
word_2 = [0, 1]
word_3 = [1, 1]

embeddings = tf.stack([word_0, word_1, word_2, word_3])

print('Embeddings:\n')
print(embeddings)
embeddings.eval()

Embeddings:

Tensor("stack:0", shape=(4, 2), dtype=int32)


array([[0, 0],
       [1, 0],
       [0, 1],
       [1, 1]], dtype=int32)

In [4]:
batch_size = 2
sentence_length = 3

sentence_0 = [0, 3, 2]
sentence_1 = [3, 1, 0]

input_data = tf.stack([sentence_0, sentence_1])

print('Sentences:\n')
print(input_data)
input_data.eval()

Sentences:

Tensor("stack_1:0", shape=(2, 3), dtype=int32)


array([[0, 3, 2],
       [3, 1, 0]], dtype=int32)

In [5]:
input_embed = tf.nn.embedding_lookup(embeddings, input_data)

print('Input:\n')
print(input_embed)
input_embed.eval()

Input:

Tensor("embedding_lookup:0", shape=(2, 3, 2), dtype=int32)


array([[[0, 0],
        [1, 1],
        [0, 1]],

       [[1, 1],
        [1, 0],
        [0, 0]]], dtype=int32)

In [6]:
session.close()
del graph

## 2. Pairing

Similarity between **`c`** and **`c'`**, where **`c' = Mr`**.

**`c`** -> encoded context vector

**`r`** -> encoded response vector

**`M`** -> translate responce to context, matrix

In [7]:
graph = tf.Graph()
graph.as_default()
session = tf.InteractiveSession(graph=graph)
session

<tensorflow.python.client.session.InteractiveSession at 0x7f3aa8deda20>

In [8]:
M = tf.constant([[1, 2], [3, 4]])

print('M', M.shape, '\n')
print(M.eval(), '\n')

M (2, 2) 

[[1 2]
 [3 4]] 



In [9]:
c = tf.constant([[1], [2]])
r = tf.constant([[3], [4]])

ct_M = tf.matmul(c, M, transpose_a=True)
ct_M_r = tf.matmul(ct_M, r)

print('c', c.shape, '\n')
print(c.eval(), '\n')
print('r', r.shape, '\n')
print(r.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

c (2, 1) 

[[1]
 [2]] 

r (2, 1) 

[[3]
 [4]] 

ct * M (1, 2) 

[[ 7 10]] 

ct * M * r (1, 1) 

[[61]] 



In [10]:
ct = tf.constant([[1, 2], [0, 0]])
rt = tf.constant([[3, 4], [0, 0]])

ct_M = tf.matmul(ct, M)
ct_M_r = tf.matmul(ct_M, rt, transpose_b=True)

print('ct', ct.shape, '\n')
print(ct.eval(), '\n')
print('rt', rt.shape, '\n')
print(rt.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

ct (2, 2) 

[[1 2]
 [0 0]] 

rt (2, 2) 

[[3 4]
 [0 0]] 

ct * M (2, 2) 

[[ 7 10]
 [ 0  0]] 

ct * M * r (2, 2) 

[[61  0]
 [ 0  0]] 



In [11]:
c = tf.constant([[5], [6]])
r = tf.constant([[7], [8]])

ct_M = tf.matmul(c, M, transpose_a=True)
ct_M_r = tf.matmul(ct_M, r)

print('c', c.shape, '\n')
print(c.eval(), '\n')
print('r', r.shape, '\n')
print(r.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

c (2, 1) 

[[5]
 [6]] 

r (2, 1) 

[[7]
 [8]] 

ct * M (1, 2) 

[[23 34]] 

ct * M * r (1, 1) 

[[433]] 



In [12]:
ct = tf.constant([[1, 2], [5, 6]])
rt = tf.constant([[3, 4], [7, 8]])

ct_M = tf.matmul(ct, M)
ct_M_r = tf.matmul(ct_M, rt, transpose_b=True)

print('ct', ct.shape, '\n')
print(ct.eval(), '\n')
print('rt', rt.shape, '\n')
print(rt.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

ct (2, 2) 

[[1 2]
 [5 6]] 

rt (2, 2) 

[[3 4]
 [7 8]] 

ct * M (2, 2) 

[[ 7 10]
 [23 34]] 

ct * M * r (2, 2) 

[[ 61 129]
 [205 433]] 



In [13]:
ct = tf.constant([[1, 2], [5, 6]])
rt = tf.constant([[3, 4], [7, 8]])

ct_M = tf.matmul(ct, M)

batch_ct_M = tf.expand_dims(ct_M, axis=2)
batch_rt = tf.expand_dims(rt, axis=2)

batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)

ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

print('ct', ct.shape, '\n')
print(ct.eval(), '\n')
print('rt', rt.shape, '\n')
print(rt.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M (batch)', batch_ct_M.shape, '\n')
print(batch_ct_M.eval(), '\n')
print('rt (batch)', batch_rt.shape, '\n')
print(batch_rt.eval(), '\n')
print('ct * M * r (batch)', batch_ct_M_r.shape, '\n')
print(batch_ct_M_r.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

ct (2, 2) 

[[1 2]
 [5 6]] 

rt (2, 2) 

[[3 4]
 [7 8]] 

ct * M (2, 2) 

[[ 7 10]
 [23 34]] 

ct * M (batch) (2, 2, 1) 

[[[ 7]
  [10]]

 [[23]
  [34]]] 

rt (batch) (2, 2, 1) 

[[[3]
  [4]]

 [[7]
  [8]]] 

ct * M * r (batch) (2, 1, 1) 

[[[ 61]]

 [[433]]] 

ct * M * r (2, 1) 

[[ 61]
 [433]] 



In [14]:
session.close()
del graph

## 3. Dual LSTM Encoder

In [15]:
graph = tf.Graph()
graph.as_default()
session = tf.InteractiveSession(graph=graph)
session

<tensorflow.python.client.session.InteractiveSession at 0x7f3aa8ded780>

In [16]:
vocab_size = 25
sentence_size = 4
batch_size = 2
embed_size = 5
hidden_size = 8

**Input Sentence (Dataset)**

In [17]:
input_context = tf.random_uniform(
    shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)

print(input_context)
input_context.eval()

Tensor("random_uniform:0", shape=(2, 4), dtype=int64)


array([[ 6,  7,  3,  9],
       [16,  3,  3, 21]])

In [18]:
input_utterance = tf.random_uniform(
    shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)

print(input_utterance)
input_utterance.eval()

Tensor("random_uniform_1:0", shape=(2, 4), dtype=int64)


array([[12, 12, 17, 24],
       [ 5,  1,  6,  3]])

In [19]:
input_context_len = tf.constant(sentence_size, shape=(batch_size, 1))

print(input_context_len)
input_context_len.eval()

Tensor("Const:0", shape=(2, 1), dtype=int32)


array([[4],
       [4]], dtype=int32)

In [20]:
input_utterance_len = tf.constant(sentence_size, shape=(batch_size, 1))

print(input_utterance_len)
input_utterance_len.eval()

Tensor("Const_1:0", shape=(2, 1), dtype=int32)


array([[4],
       [4]], dtype=int32)

**Dual Encoder - Input**

Encode Context and Utterance together.

Concatenated tensors to encode both sentences in a single pass.

In [21]:
input_data = tf.concat([input_context, input_utterance], axis=0)

print(input_data)
input_data.eval()

Tensor("concat:0", shape=(4, 4), dtype=int64)


array([[ 3, 15, 22, 24],
       [15, 19, 10, 24],
       [18, 13, 22,  2],
       [21, 12, 16, 23]])

In [22]:
input_length = tf.concat([input_context_len, input_utterance_len], axis=0)

print(input_length)
input_length.eval()

Tensor("concat_1:0", shape=(4, 1), dtype=int32)


array([[4],
       [4],
       [4],
       [4]], dtype=int32)

In [23]:
input_length = tf.reshape(input_length, [-1])

print(input_length)
input_length.eval()

Tensor("Reshape:0", shape=(4,), dtype=int32)


array([4, 4, 4, 4], dtype=int32)

**Word Embedding**

In [24]:
embeddings = tf.Variable(
    tf.random_uniform(shape=(vocab_size, embed_size), minval=-0.25, maxval=0.25))

embeddings.initializer.run()

print(embeddings)
embeddings.eval()

<tf.Variable 'Variable:0' shape=(25, 5) dtype=float32_ref>


array([[-0.21907032,  0.12420064,  0.08173907, -0.02944922,  0.13916892],
       [-0.19457632, -0.04192621, -0.01358092, -0.09134567, -0.03374678],
       [-0.20141655, -0.04927802,  0.04475701,  0.08313692, -0.24986994],
       [-0.11313343, -0.15176058,  0.19104403, -0.22445405, -0.09094852],
       [ 0.18365574,  0.04884416,  0.2263785 , -0.20114535,  0.06503594],
       [-0.06325829,  0.13168997, -0.04494703, -0.24026519, -0.08608866],
       [-0.22691852,  0.05871272, -0.05562907,  0.14495915,  0.19582754],
       [-0.11647069, -0.02338237,  0.23145068, -0.13811696,  0.12614876],
       [ 0.16999269, -0.19063103,  0.1020385 , -0.00697273,  0.20171386],
       [-0.16343969, -0.04493779,  0.08613151, -0.19786072,  0.18145597],
       [-0.21040785, -0.13465285,  0.16652071, -0.01110697, -0.23971975],
       [ 0.00184298, -0.03021336,  0.05623734, -0.10353458,  0.2476756 ],
       [-0.00172263,  0.09363931,  0.10709786, -0.08221245, -0.19734287],
       [-0.13532072,  0.18851179,  0.2

In [25]:
input_embed = tf.nn.embedding_lookup(embeddings, input_data)

print(input_embed)
input_embed.eval() 

Tensor("embedding_lookup:0", shape=(4, 4, 5), dtype=float32)


array([[[ 0.09113204, -0.07571423,  0.24813646,  0.13842708,  0.1614812 ],
        [-0.19457632, -0.04192621, -0.01358092, -0.09134567, -0.03374678],
        [ 0.11710513,  0.02811396, -0.13201642, -0.10535878,  0.09543604],
        [-0.17450178, -0.11135095,  0.18272793,  0.15989608,  0.24597967]],

       [[-0.11313343, -0.15176058,  0.19104403, -0.22445405, -0.09094852],
        [-0.21907032,  0.12420064,  0.08173907, -0.02944922,  0.13916892],
        [-0.16841835, -0.13842201,  0.02659667,  0.07141083, -0.19023174],
        [-0.16841835, -0.13842201,  0.02659667,  0.07141083, -0.19023174]],

       [[ 0.18335986,  0.06729227, -0.00719678,  0.22223705,  0.01118076],
        [-0.13532072,  0.18851179,  0.23771602,  0.03604376, -0.01679903],
        [-0.22691852,  0.05871272, -0.05562907,  0.14495915,  0.19582754],
        [ 0.16999269, -0.19063103,  0.1020385 , -0.00697273,  0.20171386]],

       [[ 0.23115325, -0.07712948, -0.22003978,  0.17074454,  0.03851539],
        [-0.2014165

**LSTM Encoder**

https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn

https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/rnn/LSTMCell

In [26]:
cell = tf.nn.rnn_cell.LSTMCell(
    hidden_size,
    forget_bias=2.0,
    use_peepholes=True,
    state_is_tuple=True)

outputs, states = tf.nn.dynamic_rnn(
    cell,
    input_embed,
    sequence_length=input_length,
    dtype=tf.float32)

for tv in cell.trainable_variables:
    tv.initializer.run()

print('Outputs:\n')
print(outputs)
print()
print('Final states:\n')
print(states)

Outputs:

Tensor("rnn/transpose:0", shape=(4, 4, 8), dtype=float32)

Final states:

LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(?, 8) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 8) dtype=float32>)


**Dual Encoder - Output**

Split the encoded vector of each sentece type.

In [27]:
context_encoding, utterance_encoding = tf.split(states.h, num_or_size_splits=2, axis=0)

In [28]:
print(context_encoding)
context_encoding.eval()

Tensor("split:0", shape=(?, 8), dtype=float32)


array([[-0.00200458, -0.07782954, -0.03901846, -0.02058421, -0.01113973,
         0.00311457,  0.01272435, -0.00159189],
       [ 0.00298107, -0.02075206, -0.01452476, -0.0094074 , -0.03382337,
        -0.00774932, -0.02527196, -0.01318577]], dtype=float32)

In [29]:
print(utterance_encoding)
utterance_encoding.eval()

Tensor("split:1", shape=(?, 8), dtype=float32)


array([[ 0.03146975, -0.0214084 ,  0.0434907 , -0.0105907 ,  0.04393053,
         0.01149318,  0.07102276,  0.03361205],
       [ 0.02853673, -0.06037546,  0.0323844 , -0.02367128, -0.01450527,
        -0.04186971,  0.02878205, -0.0065584 ]], dtype=float32)

**Prediction**

In [30]:
ct = context_encoding
rt = utterance_encoding

M = tf.Variable(tf.truncated_normal(shape=(hidden_size, hidden_size)))

M.initializer.run()

print(M)
M.eval()

<tf.Variable 'Variable_1:0' shape=(8, 8) dtype=float32_ref>


array([[ 0.12962437,  1.14114833,  0.06564874,  0.56438684, -1.34626341,
         1.28605855,  0.11107973, -0.79747385],
       [-0.11315158, -0.35980058,  1.18024707,  1.81900978,  0.05341239,
         0.92986083, -0.2714211 ,  0.81756943],
       [ 0.60764712, -0.70737588,  0.77942061,  0.29839471,  0.54188615,
        -1.31166804, -0.41452876, -1.36700535],
       [ 0.76668429, -1.86763895, -0.29187876,  0.04876193, -1.27575839,
         0.96734595,  0.81053513,  0.03170199],
       [-0.00268976, -0.68755138, -0.1671298 ,  0.99898678, -0.49784848,
         1.28975391, -0.90316176,  0.50242645],
       [-0.59916943, -0.38789433,  0.85632062,  0.69197869,  0.25876817,
         0.60806382,  0.61659104,  1.40782416],
       [ 0.40941748, -0.31378675,  0.69913375, -1.82325435, -0.30749947,
        -0.30945924, -0.10638782,  0.1487257 ],
       [ 1.26011765, -0.1710791 , -0.56199098, -0.06139187,  0.37000132,
         0.74245983,  1.51508677, -1.33844411]], dtype=float32)

In [31]:
ct_M = tf.matmul(ct, M)

print(ct_M)
ct_M.eval()

Tensor("MatMul:0", shape=(?, 8), dtype=float32)


array([[ 0.11149243, -0.07628223,  0.02294171, -0.03739275, -0.11500248,
         0.10149613,  0.02004033, -0.10178125],
       [ 0.02060858, -0.01722639,  0.11686569,  0.08371522,  0.06707289,
         0.01298102, -0.05079825,  0.00637592]], dtype=float32)

In [32]:
batch_ct_M = tf.expand_dims(ct_M, axis=2)

print(batch_ct_M)
batch_ct_M.eval()

Tensor("ExpandDims:0", shape=(?, 8, 1), dtype=float32)


array([[[ 0.05122604],
        [-0.02991961],
        [-0.03042503],
        [-0.09613165],
        [-0.08290789],
        [ 0.02937662],
        [ 0.00145915],
        [-0.05831744]],

       [[ 0.03272467],
        [ 0.03863359],
        [ 0.08826862],
        [-0.10304278],
        [ 0.12715741],
        [-0.04919792],
        [-0.03106936],
        [-0.01913336]]], dtype=float32)

In [33]:
batch_rt = tf.expand_dims(rt, axis=2)

print(batch_rt)
batch_rt.eval()

Tensor("ExpandDims_1:0", shape=(?, 8, 1), dtype=float32)


array([[[-0.02745816],
        [-0.0448821 ],
        [-0.00924154],
        [-0.07239423],
        [-0.02991658],
        [ 0.00671526],
        [ 0.01641566],
        [ 0.00155555]],

       [[-0.00696998],
        [ 0.02177757],
        [-0.00371406],
        [-0.01129351],
        [ 0.00814003],
        [ 0.05789442],
        [ 0.00783155],
        [ 0.02890128]]], dtype=float32)

In [34]:
batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)
ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

print(ct_M_r)
ct_M_r.eval()

Tensor("Squeeze:0", shape=(?, 1), dtype=float32)


array([[ 0.00300801],
       [ 0.00315467]], dtype=float32)

In [35]:
b = tf.Variable(0, dtype=tf.float32)

b.initializer.run()

print(b)
b.eval()

<tf.Variable 'Variable_2:0' shape=() dtype=float32_ref>


0.0

In [36]:
logits = ct_M_r + b
probs = tf.sigmoid(logits)

print(probs)
probs.eval()

Tensor("Sigmoid:0", shape=(?, 1), dtype=float32)


array([[ 0.50056845],
       [ 0.50094402]], dtype=float32)

**Loss**

https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/losses/sigmoid_cross_entropy

In [37]:
# Targets:
# For each pair (context, utterance)
# 1 -> utterance is the correct sentence related to context
# 0 -> utterance is a random sentence related to other context
targets = tf.constant([1, 0], shape=(2, 1))

print(targets)
targets.eval()

Tensor("Const_3:0", shape=(2, 1), dtype=int32)


array([[1],
       [0]], dtype=int32)

In [38]:
loss = tf.losses.sigmoid_cross_entropy(
    multi_class_labels=targets, logits=logits, reduction=tf.losses.Reduction.MEAN)

print(loss)
loss.eval()

INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.
Tensor("sigmoid_cross_entropy_loss/value:0", shape=(), dtype=float32)


0.69300103

In [39]:
session.close()
del graph

In [40]:
def dual_encoder(vocab_size,
                 embed_size,
                 hidden_size,
                 input_context,
                 input_context_len,
                 input_utterance,
                 input_utterance_len,
                 targets):

    input_data = tf.concat([input_context, input_utterance], axis=0)
    input_length = tf.concat([input_context_len, input_utterance_len], axis=0)
    input_length = tf.reshape(input_length, [-1])
    
    embeddings = tf.get_variable(
        'embeddings',
        shape=(vocab_size, embed_size),
        initializer=tf.random_uniform_initializer(-0.25, 0.25))

    input_embed = tf.nn.embedding_lookup(
        embeddings, input_data, name='input_embed')
        
    with tf.variable_scope('rnn'):
        cell = tf.nn.rnn_cell.LSTMCell(
            hidden_size,
            forget_bias=2.0,
            use_peepholes=True,
            state_is_tuple=True)

        outputs, states = tf.nn.dynamic_rnn(
            cell,
            input_embed,
            sequence_length=input_length,
            dtype=tf.float32)

        context_encoding, utterance_encoding = tf.split(
            states.h, num_or_size_splits=2, axis=0)

    with tf.variable_scope('prediction'):
        ct = context_encoding
        rt = utterance_encoding
        M = tf.get_variable(
            'M',
            shape=(hidden_size, hidden_size),
            initializer=tf.truncated_normal_initializer())

        ct_M = tf.matmul(ct, M)
        batch_ct_M = tf.expand_dims(ct_M, axis=2)
        batch_rt = tf.expand_dims(rt, axis=2)
        batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)
        ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

        b = tf.get_variable(
            'b', shape=(), initializer=tf.zeros_initializer())
        
        logits = ct_M_r + b
        
        probs = tf.sigmoid(logits)

    if targets is None:
        return probs, None

    loss = tf.losses.sigmoid_cross_entropy(
        multi_class_labels=targets, logits=logits, reduction=tf.losses.Reduction.MEAN)
    
    return probs, loss


graph = tf.Graph()
with graph.as_default(), tf.Session(graph=graph) as session:
    vocab_size = 100000
    embed_size = 100
    hidden_size = 200

    batch_size = 128
    sentence_size = 160
    input_context = tf.random_uniform(
        shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)
    input_context_len = tf.constant(sentence_size, shape=(batch_size, 1))
    input_utterance = tf.random_uniform(
        shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)
    input_utterance_len = tf.constant(sentence_size, shape=(batch_size, 1))
    targets = tf.random_uniform(
        shape=(batch_size, 1), minval=0, maxval=1, dtype=tf.int64)    
    
    _, loss = dual_encoder(vocab_size,
                           embed_size,
                           hidden_size,
                           input_context,
                           input_context_len,
                           input_utterance,
                           input_utterance_len,
                           targets)
    
    init = tf.global_variables_initializer()
    session.run(init)
    
    loss_value = session.run(loss)
    
    print('Average loss: {:,.3f}'.format(loss_value))

del graph

INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.
Average loss: 4.939
