**Dual LSTM Encoder for Dialog Response Generation**

http://www.wildml.com/2016/07/deep-learning-for-chatbots-2-retrieval-based-model-tensorflow/

https://github.com/dennybritz/chatbot-retrieval

https://github.com/rkadlec/ubuntu-ranking-dataset-creator

https://arxiv.org/abs/1506.08909

In [1]:
import tensorflow as tf
tf.VERSION

'1.2.0'

## 1. Word Embedding

In [2]:
graph = tf.Graph()
graph.as_default()
session = tf.InteractiveSession(graph=graph)
session

<tensorflow.python.client.session.InteractiveSession at 0x7f1d239ec588>

In [3]:
vocab_size = 4
embed_size = 2

word_0 = [0, 0]
word_1 = [1, 0]
word_2 = [0, 1]
word_3 = [1, 1]

embeddings = tf.stack([word_0, word_1, word_2, word_3])

print('Embeddings:\n')
print(embeddings)
embeddings.eval()

Embeddings:

Tensor("stack:0", shape=(4, 2), dtype=int32)


array([[0, 0],
       [1, 0],
       [0, 1],
       [1, 1]], dtype=int32)

In [4]:
batch_size = 2
sentence_length = 3

sentence_0 = [0, 3, 2]
sentence_1 = [3, 1, 0]

input_data = tf.stack([sentence_0, sentence_1])

print('Sentences:\n')
print(input_data)
input_data.eval()

Sentences:

Tensor("stack_1:0", shape=(2, 3), dtype=int32)


array([[0, 3, 2],
       [3, 1, 0]], dtype=int32)

In [5]:
input_embed = tf.nn.embedding_lookup(embeddings, input_data)

print('Input:\n')
print(input_embed)
input_embed.eval()

Input:

Tensor("embedding_lookup:0", shape=(2, 3, 2), dtype=int32)


array([[[0, 0],
        [1, 1],
        [0, 1]],

       [[1, 1],
        [1, 0],
        [0, 0]]], dtype=int32)

In [6]:
session.close()
del graph

## 2. Pairing

Similarity between **`c`** and **`c'`**, where **`c' = Mr`**.

**`c`** -> encoded context vector

**`r`** -> encoded response vector

**`M`** -> translate responce to context, matrix

In [7]:
graph = tf.Graph()
graph.as_default()
session = tf.InteractiveSession(graph=graph)
session

<tensorflow.python.client.session.InteractiveSession at 0x7f1d239dffd0>

In [8]:
M = tf.constant([[1, 2], [3, 4]])

print('M', M.shape, '\n')
print(M.eval(), '\n')

M (2, 2) 

[[1 2]
 [3 4]] 



In [9]:
c = tf.constant([[1], [2]])
r = tf.constant([[3], [4]])

ct_M = tf.matmul(c, M, transpose_a=True)
ct_M_r = tf.matmul(ct_M, r)

print('c', c.shape, '\n')
print(c.eval(), '\n')
print('r', r.shape, '\n')
print(r.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

c (2, 1) 

[[1]
 [2]] 

r (2, 1) 

[[3]
 [4]] 

ct * M (1, 2) 

[[ 7 10]] 

ct * M * r (1, 1) 

[[61]] 



In [10]:
ct = tf.constant([[1, 2], [0, 0]])
rt = tf.constant([[3, 4], [0, 0]])

ct_M = tf.matmul(ct, M)
ct_M_r = tf.matmul(ct_M, rt, transpose_b=True)

print('ct', ct.shape, '\n')
print(ct.eval(), '\n')
print('rt', rt.shape, '\n')
print(rt.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

ct (2, 2) 

[[1 2]
 [0 0]] 

rt (2, 2) 

[[3 4]
 [0 0]] 

ct * M (2, 2) 

[[ 7 10]
 [ 0  0]] 

ct * M * r (2, 2) 

[[61  0]
 [ 0  0]] 



In [11]:
c = tf.constant([[5], [6]])
r = tf.constant([[7], [8]])

ct_M = tf.matmul(c, M, transpose_a=True)
ct_M_r = tf.matmul(ct_M, r)

print('c', c.shape, '\n')
print(c.eval(), '\n')
print('r', r.shape, '\n')
print(r.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

c (2, 1) 

[[5]
 [6]] 

r (2, 1) 

[[7]
 [8]] 

ct * M (1, 2) 

[[23 34]] 

ct * M * r (1, 1) 

[[433]] 



In [12]:
ct = tf.constant([[1, 2], [5, 6]])
rt = tf.constant([[3, 4], [7, 8]])

ct_M = tf.matmul(ct, M)
ct_M_r = tf.matmul(ct_M, rt, transpose_b=True)

print('ct', ct.shape, '\n')
print(ct.eval(), '\n')
print('rt', rt.shape, '\n')
print(rt.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

ct (2, 2) 

[[1 2]
 [5 6]] 

rt (2, 2) 

[[3 4]
 [7 8]] 

ct * M (2, 2) 

[[ 7 10]
 [23 34]] 

ct * M * r (2, 2) 

[[ 61 129]
 [205 433]] 



In [13]:
ct = tf.constant([[1, 2], [5, 6]])
rt = tf.constant([[3, 4], [7, 8]])

ct_M = tf.matmul(ct, M)

batch_ct_M = tf.expand_dims(ct_M, axis=2)
batch_rt = tf.expand_dims(rt, axis=2)

batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)

ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

print('ct', ct.shape, '\n')
print(ct.eval(), '\n')
print('rt', rt.shape, '\n')
print(rt.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M (batch)', batch_ct_M.shape, '\n')
print(batch_ct_M.eval(), '\n')
print('rt (batch)', batch_rt.shape, '\n')
print(batch_rt.eval(), '\n')
print('ct * M * r (batch)', batch_ct_M_r.shape, '\n')
print(batch_ct_M_r.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

ct (2, 2) 

[[1 2]
 [5 6]] 

rt (2, 2) 

[[3 4]
 [7 8]] 

ct * M (2, 2) 

[[ 7 10]
 [23 34]] 

ct * M (batch) (2, 2, 1) 

[[[ 7]
  [10]]

 [[23]
  [34]]] 

rt (batch) (2, 2, 1) 

[[[3]
  [4]]

 [[7]
  [8]]] 

ct * M * r (batch) (2, 1, 1) 

[[[ 61]]

 [[433]]] 

ct * M * r (2, 1) 

[[ 61]
 [433]] 



In [14]:
session.close()
del graph

## 3. Dual LSTM Encoder

In [15]:
graph = tf.Graph()
graph.as_default()
session = tf.InteractiveSession(graph=graph)
session

<tensorflow.python.client.session.InteractiveSession at 0x7f1d239ec940>

In [16]:
vocab_size = 25
sentence_size = 4
batch_size = 2
embed_size = 5
hidden_size = 8

**Input Sentence (Dataset)**

In [17]:
input_context = tf.random_uniform(
    shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)

print(input_context)
input_context.eval()

Tensor("random_uniform:0", shape=(2, 4), dtype=int64)


array([[22,  5, 23, 10],
       [ 9, 20,  2, 19]])

In [18]:
input_utterance = tf.random_uniform(
    shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)

print(input_utterance)
input_utterance.eval()

Tensor("random_uniform_1:0", shape=(2, 4), dtype=int64)


array([[11,  6,  5,  5],
       [22, 20, 10, 15]])

In [19]:
input_context_len = tf.constant(sentence_size, shape=(batch_size, 1))

print(input_context_len)
input_context_len.eval()

Tensor("Const:0", shape=(2, 1), dtype=int32)


array([[4],
       [4]], dtype=int32)

In [20]:
input_utterance_len = tf.constant(sentence_size, shape=(batch_size, 1))

print(input_utterance_len)
input_utterance_len.eval()

Tensor("Const_1:0", shape=(2, 1), dtype=int32)


array([[4],
       [4]], dtype=int32)

**Dual Encoder - Input**

Encode Context and Utterance together.

Concatenated tensors to encode both sentences in a single pass.

In [21]:
input_data = tf.concat([input_context, input_utterance], axis=0)

print(input_data)
input_data.eval()

Tensor("concat:0", shape=(4, 4), dtype=int64)


array([[22, 22, 20, 16],
       [ 8, 12, 10, 11],
       [ 4,  5, 17,  4],
       [17,  6, 22, 24]])

In [22]:
input_length = tf.concat([input_context_len, input_utterance_len], axis=0)

print(input_length)
input_length.eval()

Tensor("concat_1:0", shape=(4, 1), dtype=int32)


array([[4],
       [4],
       [4],
       [4]], dtype=int32)

In [23]:
input_length = tf.reshape(input_length, [-1])

print(input_length)
input_length.eval()

Tensor("Reshape:0", shape=(4,), dtype=int32)


array([4, 4, 4, 4], dtype=int32)

**Word Embedding**

In [24]:
embeddings = tf.Variable(
    tf.random_uniform(shape=(vocab_size, embed_size), minval=-0.25, maxval=0.25))

embeddings.initializer.run()

print(embeddings)
embeddings.eval()

<tf.Variable 'Variable:0' shape=(25, 5) dtype=float32_ref>


array([[ -2.04107165e-01,   2.37267435e-01,   1.14207685e-01,
         -7.48070478e-02,   1.78534091e-01],
       [  2.62767673e-02,   1.81471288e-01,   2.12492347e-02,
         -1.81813300e-01,   2.45958567e-03],
       [  7.27196336e-02,  -1.20882750e-01,  -4.03785706e-03,
         -2.19977736e-01,   2.95515060e-02],
       [  5.37881851e-02,   1.51563585e-01,   1.71785474e-01,
          1.83263302e-01,   4.02801037e-02],
       [ -1.40134633e-01,  -7.83115625e-03,   1.60895646e-01,
          1.11507297e-01,  -1.14968061e-01],
       [  2.09304869e-01,   2.15322495e-01,  -1.68165445e-01,
          2.51206756e-02,  -2.48422027e-01],
       [  8.58283043e-02,  -1.11105800e-01,  -9.47612524e-03,
         -1.63683116e-01,  -1.11841798e-01],
       [  2.13393331e-01,   1.17012084e-01,   2.13164806e-01,
          1.92613840e-01,  -1.34313226e-01],
       [ -2.18812704e-01,  -1.04888201e-01,  -1.05619729e-01,
          2.40417659e-01,   2.00243890e-01],
       [  1.14995301e-01,  -3.9517879

In [25]:
input_embed = tf.nn.embedding_lookup(embeddings, input_data)

print(input_embed)
input_embed.eval() 

Tensor("embedding_lookup:0", shape=(4, 4, 5), dtype=float32)


array([[[-0.19789857,  0.05603832,  0.20012206, -0.10646617,  0.19833338],
        [ 0.07271963, -0.12088275, -0.00403786, -0.21997774,  0.02955151],
        [ 0.21339333,  0.11701208,  0.21316481,  0.19261384, -0.13431323],
        [ 0.05320454, -0.15145749,  0.08474243,  0.10350358, -0.09061474]],

       [[-0.19568312,  0.05757344,  0.05571216, -0.00041908, -0.05896401],
        [ 0.01994389,  0.16112757,  0.22844422, -0.06527919, -0.075764  ],
        [-0.21328759, -0.15018392, -0.14115661,  0.13768727,  0.13808584],
        [ 0.02627677,  0.18147129,  0.02124923, -0.1818133 ,  0.00245959]],

       [[ 0.20930487,  0.21532249, -0.16816545,  0.02512068, -0.24842203],
        [ 0.02627677,  0.18147129,  0.02124923, -0.1818133 ,  0.00245959],
        [ 0.18034887,  0.08843511,  0.04614931,  0.06737208,  0.24065715],
        [ 0.1867047 ,  0.00907141,  0.12092727,  0.05982256,  0.18101031]],

       [[ 0.18034887,  0.08843511,  0.04614931,  0.06737208,  0.24065715],
        [ 0.0537881

**LSTM Encoder**

https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn

https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/contrib/rnn/LSTMCell

In [26]:
cell = tf.nn.rnn_cell.LSTMCell(
    hidden_size,
    forget_bias=2.0,
    use_peepholes=True,
    state_is_tuple=True)

outputs, states = tf.nn.dynamic_rnn(
    cell,
    input_embed,
    sequence_length=input_length,
    dtype=tf.float32)

for tv in cell.trainable_variables:
    tv.initializer.run()

print('Outputs:\n')
print(outputs)
print()
print('Final states:\n')
print(states)

Outputs:

Tensor("rnn/transpose:0", shape=(4, 4, 8), dtype=float32)

Final states:

LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(?, 8) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 8) dtype=float32>)


**Dual Encoder - Output**

Split the encoded vector of each sentece type.

In [27]:
context_encoding, utterance_encoding = tf.split(states.h, num_or_size_splits=2, axis=0)

In [28]:
print(context_encoding)
context_encoding.eval()

Tensor("split:0", shape=(?, 8), dtype=float32)


array([[ 0.00434534,  0.0113768 ,  0.03847972, -0.03734675, -0.033556  ,
        -0.04215824,  0.01554458, -0.02245954],
       [ 0.00745609, -0.04339714,  0.06076946, -0.01559918,  0.00880726,
        -0.06102269, -0.03033336, -0.02441947]], dtype=float32)

In [29]:
print(utterance_encoding)
utterance_encoding.eval()

Tensor("split:1", shape=(?, 8), dtype=float32)


array([[-0.02006005,  0.03290617, -0.06813933, -0.00201645, -0.01616752,
         0.09591205,  0.02442359,  0.04216364],
       [-0.02327644,  0.0127591 , -0.03932941,  0.03534726,  0.00401921,
         0.08309978,  0.01232029,  0.05077659]], dtype=float32)

**Prediction**

In [30]:
ct = context_encoding
rt = utterance_encoding

M = tf.Variable(tf.truncated_normal(shape=(hidden_size, hidden_size)))

M.initializer.run()

print(M)
M.eval()

<tf.Variable 'Variable_1:0' shape=(8, 8) dtype=float32_ref>


array([[-0.36489382, -1.9051007 , -0.63643503,  0.20628606,  0.22457613,
         1.23054576, -0.21263087,  0.62139791],
       [ 0.842426  ,  0.35761181, -0.32304102, -0.61951959, -1.03329933,
         0.52980793,  1.39493489, -0.12824728],
       [ 0.01979361, -1.00124192,  1.26791668,  1.47156739, -0.56096286,
        -1.37084091,  1.01594818,  1.03041232],
       [-0.63078672,  0.72437549,  0.40278333,  0.06710047,  0.18424897,
         1.89565718, -1.64592218,  0.66166532],
       [ 0.39488083, -0.77108759,  0.35598478, -0.2579954 , -0.52177596,
        -1.46128106, -0.55175263, -1.69498014],
       [ 1.38248515,  0.4515157 ,  0.30242011, -0.21638362,  0.71063805,
        -1.26243246,  1.30880558,  0.45916498],
       [-0.81056315, -0.19090298,  1.02799225,  0.06439355, -0.12565689,
        -0.28468347, -0.8998273 , -0.81133282],
       [ 0.44970828,  0.81073517,  1.17651534, -0.4153094 ,  1.50258088,
        -0.14034843, -1.84965444,  0.27871731]], dtype=float32)

In [31]:
ct_M = tf.matmul(ct, M)

print(ct_M)
ct_M.eval()

Tensor("MatMul:0", shape=(?, 8), dtype=float32)


array([[ -4.60841227e-03,   4.57319012e-03,   2.89663672e-02,
          1.18324189e-02,   3.23643833e-02,  -5.56884392e-04,
         -1.03770860e-01,  -1.39733500e-04],
       [  1.18097939e-01,   2.78882056e-01,   4.33437414e-02,
         -1.35466948e-01,   1.65135354e-01,  -1.15727605e-02,
         -2.04559118e-01,  -4.57612015e-02]], dtype=float32)

In [32]:
batch_ct_M = tf.expand_dims(ct_M, axis=2)

print(batch_ct_M)
batch_ct_M.eval()

Tensor("ExpandDims:0", shape=(?, 8, 1), dtype=float32)


array([[[ 0.08164874],
        [ 0.10342894],
        [ 0.0842026 ],
        [-0.01081535],
        [ 0.11796083],
        [-0.09975211],
        [-0.10813951],
        [ 0.01602971]],

       [[-0.13119766],
        [-0.22096619],
        [ 0.00090272],
        [ 0.15289454],
        [-0.1193141 ],
        [-0.00155331],
        [ 0.0384606 ],
        [ 0.03740075]]], dtype=float32)

In [33]:
batch_rt = tf.expand_dims(rt, axis=2)

print(batch_rt)
batch_rt.eval()

Tensor("ExpandDims_1:0", shape=(?, 8, 1), dtype=float32)


array([[[-0.01237914],
        [-0.03904677],
        [-0.00861677],
        [ 0.01759103],
        [ 0.03048603],
        [ 0.00806168],
        [-0.02958024],
        [ 0.0015794 ]],

       [[-0.02060232],
        [-0.01270923],
        [ 0.0181511 ],
        [-0.00363906],
        [ 0.00152899],
        [-0.00344818],
        [-0.02591585],
        [ 0.01295206]]], dtype=float32)

In [34]:
batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)
ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

print(ct_M_r)
ct_M_r.eval()

Tensor("Squeeze:0", shape=(?, 1), dtype=float32)


array([[ 0.00335007],
       [ 0.01933217]], dtype=float32)

In [35]:
b = tf.Variable(0, dtype=tf.float32)

b.initializer.run()

print(b)
b.eval()

<tf.Variable 'Variable_2:0' shape=() dtype=float32_ref>


0.0

In [36]:
logits = ct_M_r + b
probs = tf.sigmoid(logits)

print(probs)
probs.eval()

Tensor("Sigmoid:0", shape=(?, 1), dtype=float32)


array([[ 0.49999952],
       [ 0.49983948]], dtype=float32)

**Loss**

https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/losses/sigmoid_cross_entropy

In [37]:
# Targets:
# For each pair (context, utterance)
# 1 -> utterance is the correct sentence related to context
# 0 -> utterance is a random sentence related to other context
targets = tf.constant([1, 0], shape=(2, 1))

print(targets)
targets.eval()

Tensor("Const_3:0", shape=(2, 1), dtype=int32)


array([[1],
       [0]], dtype=int32)

In [38]:
loss = tf.losses.sigmoid_cross_entropy(
    multi_class_labels=targets, logits=logits, reduction=tf.losses.Reduction.MEAN)

print(loss)
loss.eval()

INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.
Tensor("sigmoid_cross_entropy_loss/value:0", shape=(), dtype=float32)


0.69175792

In [39]:
session.close()
del graph

In [40]:
def dual_encoder(vocab_size,
                 embed_size,
                 hidden_size,
                 input_context,
                 input_context_len,
                 input_utterance,
                 input_utterance_len,
                 targets):

    input_data = tf.concat([input_context, input_utterance], axis=0)
    input_length = tf.concat([input_context_len, input_utterance_len], axis=0)
    input_length = tf.reshape(input_length, [-1])
    
    embeddings = tf.get_variable(
        'embeddings',
        shape=(vocab_size, embed_size),
        initializer=tf.random_uniform_initializer(-0.25, 0.25))

    input_embed = tf.nn.embedding_lookup(
        embeddings, input_data, name='input_embed')
        
    with tf.variable_scope('rnn'):
        cell = tf.nn.rnn_cell.LSTMCell(
            hidden_size,
            forget_bias=2.0,
            use_peepholes=True,
            state_is_tuple=True)

        outputs, states = tf.nn.dynamic_rnn(
            cell,
            input_embed,
            sequence_length=input_length,
            dtype=tf.float32)

        context_encoding, utterance_encoding = tf.split(
            states.h, num_or_size_splits=2, axis=0)

    with tf.variable_scope('prediction'):
        ct = context_encoding
        rt = utterance_encoding
        M = tf.get_variable(
            'M',
            shape=(hidden_size, hidden_size),
            initializer=tf.truncated_normal_initializer())

        ct_M = tf.matmul(ct, M)
        batch_ct_M = tf.expand_dims(ct_M, axis=2)
        batch_rt = tf.expand_dims(rt, axis=2)
        batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)
        ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

        b = tf.get_variable(
            'b', shape=(), initializer=tf.zeros_initializer())
        
        logits = ct_M_r + b
        
        probs = tf.sigmoid(logits)

    if targets is None:
        return probs, None

    loss = tf.losses.sigmoid_cross_entropy(
        multi_class_labels=targets, logits=logits, reduction=tf.losses.Reduction.MEAN)
    
    return probs, loss


graph = tf.Graph()
with graph.as_default(), tf.Session(graph=graph) as session:
    vocab_size = 91619
    embed_size = 100
    hidden_size = 256

    batch_size = 128
    sentence_size = 220
    input_context = tf.random_uniform(
        shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)
    input_context_len = tf.constant(sentence_size, shape=(batch_size, 1))
    input_utterance = tf.random_uniform(
        shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)
    input_utterance_len = tf.constant(sentence_size, shape=(batch_size, 1))
    targets = tf.random_uniform(
        shape=(batch_size, 1), minval=0, maxval=1, dtype=tf.int64)    
    
    _, loss = dual_encoder(vocab_size,
                           embed_size,
                           hidden_size,
                           input_context,
                           input_context_len,
                           input_utterance,
                           input_utterance_len,
                           targets)
    
    init = tf.global_variables_initializer()
    session.run(init)
    
    loss_value = session.run(loss)
    
    print('Average loss: {:,.3f}'.format(loss_value))

del graph

INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.
Average loss: 3.503
