**Dual LSTM Encoder for Dialog Response Generation**

http://www.wildml.com/2016/07/deep-learning-for-chatbots-2-retrieval-based-model-tensorflow/

https://github.com/dennybritz/chatbot-retrieval

https://github.com/rkadlec/ubuntu-ranking-dataset-creator

https://arxiv.org/abs/1506.08909

In [1]:
import tensorflow as tf
tf.VERSION

'1.2.0'

## 1. Word Embedding

In [2]:
graph = tf.Graph()
graph.as_default()
session = tf.InteractiveSession(graph=graph)
session

<tensorflow.python.client.session.InteractiveSession at 0x7feef2eec6a0>

In [3]:
vocab_size = 4
embed_size = 2

word_0 = [0, 0]
word_1 = [1, 0]
word_2 = [0, 1]
word_3 = [1, 1]

embeddings = tf.stack([word_0, word_1, word_2, word_3])

print('Embeddings:\n')
print(embeddings)
embeddings.eval()

Embeddings:

Tensor("stack:0", shape=(4, 2), dtype=int32)


array([[0, 0],
       [1, 0],
       [0, 1],
       [1, 1]], dtype=int32)

In [4]:
batch_size = 2
sentence_length = 3

sentence_0 = [0, 3, 2]
sentence_1 = [3, 1, 0]

input_data = tf.stack([sentence_0, sentence_1])

print('Sentences:\n')
print(input_data)
input_data.eval()

Sentences:

Tensor("stack_1:0", shape=(2, 3), dtype=int32)


array([[0, 3, 2],
       [3, 1, 0]], dtype=int32)

In [5]:
input_embed = tf.nn.embedding_lookup(embeddings, input_data)

print('Input:\n')
print(input_embed)
input_embed.eval()

Input:

Tensor("embedding_lookup:0", shape=(2, 3, 2), dtype=int32)


array([[[0, 0],
        [1, 1],
        [0, 1]],

       [[1, 1],
        [1, 0],
        [0, 0]]], dtype=int32)

In [6]:
session.close()
del graph

## 2. Pairing

Similarity between **`c`** and **`c'`**, where **`c' = Mr`**.

**`c`** -> encoded context vector

**`r`** -> encoded response vector

**`M`** -> translate responce to context, matrix

In [7]:
graph = tf.Graph()
graph.as_default()
session = tf.InteractiveSession(graph=graph)
session

<tensorflow.python.client.session.InteractiveSession at 0x7feef2eec860>

In [8]:
M = tf.constant([[1, 2], [3, 4]])

print('M', M.shape, '\n')
print(M.eval(), '\n')

M (2, 2) 

[[1 2]
 [3 4]] 



In [9]:
c = tf.constant([[1], [2]])
r = tf.constant([[3], [4]])

ct_M = tf.matmul(c, M, transpose_a=True)
ct_M_r = tf.matmul(ct_M, r)

print('c', c.shape, '\n')
print(c.eval(), '\n')
print('r', r.shape, '\n')
print(r.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

c (2, 1) 

[[1]
 [2]] 

r (2, 1) 

[[3]
 [4]] 

ct * M (1, 2) 

[[ 7 10]] 

ct * M * r (1, 1) 

[[61]] 



In [10]:
ct = tf.constant([[1, 2], [0, 0]])
rt = tf.constant([[3, 4], [0, 0]])

ct_M = tf.matmul(ct, M)
ct_M_r = tf.matmul(ct_M, rt, transpose_b=True)

print('ct', ct.shape, '\n')
print(ct.eval(), '\n')
print('rt', rt.shape, '\n')
print(rt.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

ct (2, 2) 

[[1 2]
 [0 0]] 

rt (2, 2) 

[[3 4]
 [0 0]] 

ct * M (2, 2) 

[[ 7 10]
 [ 0  0]] 

ct * M * r (2, 2) 

[[61  0]
 [ 0  0]] 



In [11]:
c = tf.constant([[5], [6]])
r = tf.constant([[7], [8]])

ct_M = tf.matmul(c, M, transpose_a=True)
ct_M_r = tf.matmul(ct_M, r)

print('c', c.shape, '\n')
print(c.eval(), '\n')
print('r', r.shape, '\n')
print(r.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

c (2, 1) 

[[5]
 [6]] 

r (2, 1) 

[[7]
 [8]] 

ct * M (1, 2) 

[[23 34]] 

ct * M * r (1, 1) 

[[433]] 



In [12]:
ct = tf.constant([[1, 2], [5, 6]])
rt = tf.constant([[3, 4], [7, 8]])

ct_M = tf.matmul(ct, M)
ct_M_r = tf.matmul(ct_M, rt, transpose_b=True)

print('ct', ct.shape, '\n')
print(ct.eval(), '\n')
print('rt', rt.shape, '\n')
print(rt.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

ct (2, 2) 

[[1 2]
 [5 6]] 

rt (2, 2) 

[[3 4]
 [7 8]] 

ct * M (2, 2) 

[[ 7 10]
 [23 34]] 

ct * M * r (2, 2) 

[[ 61 129]
 [205 433]] 



In [13]:
ct = tf.constant([[1, 2], [5, 6]])
rt = tf.constant([[3, 4], [7, 8]])

ct_M = tf.matmul(ct, M)

batch_ct_M = tf.expand_dims(ct_M, axis=2)
batch_rt = tf.expand_dims(rt, axis=2)

batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)

ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

print('ct', ct.shape, '\n')
print(ct.eval(), '\n')
print('rt', rt.shape, '\n')
print(rt.eval(), '\n')
print('ct * M', ct_M.shape, '\n')
print(ct_M.eval(), '\n')
print('ct * M (batch)', batch_ct_M.shape, '\n')
print(batch_ct_M.eval(), '\n')
print('rt (batch)', batch_rt.shape, '\n')
print(batch_rt.eval(), '\n')
print('ct * M * r (batch)', batch_ct_M_r.shape, '\n')
print(batch_ct_M_r.eval(), '\n')
print('ct * M * r', ct_M_r.shape, '\n')
print(ct_M_r.eval(), '\n')

ct (2, 2) 

[[1 2]
 [5 6]] 

rt (2, 2) 

[[3 4]
 [7 8]] 

ct * M (2, 2) 

[[ 7 10]
 [23 34]] 

ct * M (batch) (2, 2, 1) 

[[[ 7]
  [10]]

 [[23]
  [34]]] 

rt (batch) (2, 2, 1) 

[[[3]
  [4]]

 [[7]
  [8]]] 

ct * M * r (batch) (2, 1, 1) 

[[[ 61]]

 [[433]]] 

ct * M * r (2, 1) 

[[ 61]
 [433]] 



In [14]:
session.close()
del graph

## 3. Dual LSTM Encoder

In [15]:
graph = tf.Graph()
graph.as_default()
session = tf.InteractiveSession(graph=graph)
session

<tensorflow.python.client.session.InteractiveSession at 0x7feef0eaf5f8>

In [16]:
vocab_size = 25
sentence_size = 4
batch_size = 2
embed_size = 5
hidden_size = 8

**Input Sentence -> Word Embedding**

In [17]:
input_context = tf.random_uniform(
    shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)

print(input_context)
input_context.eval()

Tensor("random_uniform:0", shape=(2, 4), dtype=int64)


array([[20,  3, 18,  0],
       [19, 10, 16, 11]])

In [18]:
input_utterance = tf.random_uniform(
    shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)

print(input_utterance)
input_utterance.eval()

Tensor("random_uniform_1:0", shape=(2, 4), dtype=int64)


array([[ 8, 16,  6, 22],
       [16, 18, 11, 14]])

In [19]:
input_context_len = tf.constant(sentence_size, shape=(batch_size, 1))

print(input_context_len)
input_context_len.eval()

Tensor("Const:0", shape=(2, 1), dtype=int32)


array([[4],
       [4]], dtype=int32)

In [20]:
input_utterance_len = tf.constant(sentence_size, shape=(batch_size, 1))

print(input_utterance_len)
input_utterance_len.eval()

Tensor("Const_1:0", shape=(2, 1), dtype=int32)


array([[4],
       [4]], dtype=int32)

In [21]:
embeddings = tf.Variable(
    tf.random_uniform(shape=(vocab_size, embed_size), minval=-0.25, maxval=0.25))

embeddings.initializer.run()

print(embeddings)
embeddings.eval()

<tf.Variable 'Variable:0' shape=(25, 5) dtype=float32_ref>


array([[ 0.14988273, -0.19110286, -0.24203259,  0.09176153,  0.13214087],
       [ 0.1510129 ,  0.0303269 , -0.15055364, -0.14482069,  0.02167702],
       [-0.1299116 ,  0.16214228, -0.03491116,  0.09344488, -0.09528035],
       [-0.24947137,  0.19961739, -0.01646852, -0.0918501 ,  0.1530059 ],
       [ 0.11675918, -0.01111042, -0.0985992 ,  0.09767246, -0.16816711],
       [ 0.03683805, -0.18322444, -0.13840151,  0.08290368,  0.13598228],
       [-0.04843336, -0.0033769 ,  0.0212689 ,  0.01030666,  0.02789927],
       [ 0.08915007,  0.07976782,  0.24043638, -0.17069489, -0.02816147],
       [-0.1653192 ,  0.03305852,  0.11798108, -0.20664763,  0.13696319],
       [-0.06338978,  0.04527843,  0.01564932, -0.02352995,  0.07507151],
       [ 0.03615546,  0.19331169,  0.14691347, -0.02281886,  0.1041016 ],
       [ 0.1577577 , -0.02061677,  0.1709919 , -0.22921723, -0.04294664],
       [ 0.20407331, -0.22857004, -0.24630672,  0.24416858, -0.20645511],
       [ 0.12103355,  0.07726097, -0.0

In [22]:
context_embed = tf.nn.embedding_lookup(embeddings, input_context)

print(context_embed)
context_embed.eval() 

Tensor("embedding_lookup:0", shape=(2, 4, 5), dtype=float32)


array([[[ 0.03615546,  0.19331169,  0.14691347, -0.02281886,  0.1041016 ],
        [ 0.20407331, -0.22857004, -0.24630672,  0.24416858, -0.20645511],
        [ 0.22622663, -0.19167888,  0.17659527,  0.1906907 , -0.08598322],
        [-0.24947137,  0.19961739, -0.01646852, -0.0918501 ,  0.1530059 ]],

       [[-0.1299116 ,  0.16214228, -0.03491116,  0.09344488, -0.09528035],
        [-0.04843336, -0.0033769 ,  0.0212689 ,  0.01030666,  0.02789927],
        [ 0.22622663, -0.19167888,  0.17659527,  0.1906907 , -0.08598322],
        [ 0.1510129 ,  0.0303269 , -0.15055364, -0.14482069,  0.02167702]]], dtype=float32)

In [23]:
utterance_embed = tf.nn.embedding_lookup(embeddings, input_utterance)

print(utterance_embed)
utterance_embed.eval()

Tensor("embedding_lookup_1:0", shape=(2, 4, 5), dtype=float32)


array([[[ 0.05597407,  0.11223638,  0.20009196,  0.20111275, -0.10194093],
        [-0.24947137,  0.19961739, -0.01646852, -0.0918501 ,  0.1530059 ],
        [ 0.13453859, -0.11819875, -0.17189431,  0.14056057,  0.08515871],
        [-0.11714709,  0.23732382, -0.24154603, -0.04631478,  0.11294776]],

       [[ 0.03683805, -0.18322444, -0.13840151,  0.08290368,  0.13598228],
        [ 0.1577577 , -0.02061677,  0.1709919 , -0.22921723, -0.04294664],
        [ 0.03683805, -0.18322444, -0.13840151,  0.08290368,  0.13598228],
        [-0.1299116 ,  0.16214228, -0.03491116,  0.09344488, -0.09528035]]], dtype=float32)

**Dual Encode - Input**

Concatenated tensor to encode both sentences in a single pass.

In [24]:
input_embed = tf.concat([context_embed, utterance_embed], axis=0)

print(input_embed)
input_embed.eval()

Tensor("concat:0", shape=(4, 4, 5), dtype=float32)


array([[[ 0.1577577 , -0.02061677,  0.1709919 , -0.22921723, -0.04294664],
        [-0.13184053,  0.14212346,  0.22124308, -0.11775833,  0.03535438],
        [ 0.03615546,  0.19331169,  0.14691347, -0.02281886,  0.1041016 ],
        [ 0.1577577 , -0.02061677,  0.1709919 , -0.22921723, -0.04294664]],

       [[ 0.11675918, -0.01111042, -0.0985992 ,  0.09767246, -0.16816711],
        [ 0.08915007,  0.07976782,  0.24043638, -0.17069489, -0.02816147],
        [-0.06338978,  0.04527843,  0.01564932, -0.02352995,  0.07507151],
        [-0.06486589, -0.24939632,  0.2331118 ,  0.09323716, -0.01013249]],

       [[ 0.03683805, -0.18322444, -0.13840151,  0.08290368,  0.13598228],
        [ 0.1510129 ,  0.0303269 , -0.15055364, -0.14482069,  0.02167702],
        [ 0.08915007,  0.07976782,  0.24043638, -0.17069489, -0.02816147],
        [ 0.08915007,  0.07976782,  0.24043638, -0.17069489, -0.02816147]],

       [[ 0.22622663, -0.19167888,  0.17659527,  0.1906907 , -0.08598322],
        [-0.2411596

In [25]:
input_length = tf.concat([input_context_len, input_utterance_len], axis=0)

print(input_length)
input_length.eval()

Tensor("concat_1:0", shape=(4, 1), dtype=int32)


array([[4],
       [4],
       [4],
       [4]], dtype=int32)

In [26]:
input_length = tf.reshape(input_length, [-1])

print(input_length)
input_length.eval()

Tensor("Reshape:0", shape=(4,), dtype=int32)


array([4, 4, 4, 4], dtype=int32)

**LSTM Encoder**

In [27]:
cell = tf.nn.rnn_cell.LSTMCell(
    hidden_size,
    forget_bias=2.0,
    use_peepholes=True,
    state_is_tuple=True)

outputs, states = tf.nn.dynamic_rnn(
    cell,
    input_embed,
    sequence_length=input_length,
    dtype=tf.float32)

for tv in cell.trainable_variables:
    tv.initializer.run()

print('Outputs:\n')
print(outputs)
print()
print('Final states:\n')
print(states)

Outputs:

Tensor("rnn/transpose:0", shape=(4, 4, 8), dtype=float32)

Final states:

LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(?, 8) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 8) dtype=float32>)


**Dual Encode - Output**

Split the encode of each sentece type.

In [28]:
context_encoding, utterance_encoding = tf.split(states.h, num_or_size_splits=2, axis=0)

In [29]:
print(context_encoding)
context_encoding.eval()

Tensor("split:0", shape=(?, 8), dtype=float32)


array([[-0.00699558,  0.00977578, -0.00646461, -0.00915181,  0.01707251,
        -0.0238877 , -0.00059203,  0.01717633],
       [ 0.00099451, -0.0106104 , -0.04555769, -0.01584092,  0.01838387,
        -0.04374483, -0.03094321,  0.04608263]], dtype=float32)

In [30]:
print(utterance_encoding)
utterance_encoding.eval()

Tensor("split:1", shape=(?, 8), dtype=float32)


array([[-0.03658517, -0.01717115,  0.04685726, -0.0335674 ,  0.00718115,
         0.01776352,  0.01542488, -0.01498981],
       [ 0.06261449, -0.03825479, -0.06994751,  0.04793863, -0.00912744,
        -0.02626521, -0.03092923,  0.02570395]], dtype=float32)

**Prediction**

In [31]:
ct = context_encoding
rt = utterance_encoding

M = tf.Variable(tf.truncated_normal(shape=(hidden_size, hidden_size)))

M.initializer.run()

print(M)
M.eval()

<tf.Variable 'Variable_1:0' shape=(8, 8) dtype=float32_ref>


array([[-0.02592108, -0.49067274, -0.76623291,  1.49376059, -0.4238739 ,
         0.92777282, -0.18552829,  0.39114252],
       [ 0.35380542, -0.02336758,  0.47686008,  0.17341617,  0.34636134,
        -0.48681974, -0.15234731,  0.87920207],
       [-0.7267921 , -1.09397745, -0.19578362,  0.66617787, -0.32199371,
         0.27647865,  0.08492222,  1.80460811],
       [-1.47765732,  0.58444881,  0.75915891, -0.41247815, -0.95768559,
         0.19206792,  0.17678015, -0.38182244],
       [ 0.09469015, -0.7331118 , -0.28203532,  0.37355739,  0.00920274,
         0.23877402,  0.27701908, -0.02028478],
       [-0.28349239,  1.89762306, -0.89023685,  0.36111453, -0.04939051,
         1.42928362, -0.04162903, -0.28117257],
       [-0.35769558,  1.21531439, -1.20074153, -1.35509169,  0.09909067,
         1.06533611,  0.71215773,  1.27959204],
       [ 0.40157151, -0.54389036,  1.07706428, -0.5330258 ,  0.06305721,
         0.62319988,  0.99265051,  1.48126411]], dtype=float32)

In [32]:
ct_M = tf.matmul(ct, M)

print(ct_M)
ct_M.eval()

Tensor("MatMul:0", shape=(?, 8), dtype=float32)


array([[  2.04271219e-05,   2.22710837e-02,  -3.00640408e-02,
         -2.74273399e-02,  -3.46280821e-03,   4.22730409e-02,
          2.17236895e-02,   2.30774693e-02],
       [ -6.47506118e-02,   1.23922862e-01,  -7.01749846e-02,
          4.47266251e-02,  -1.98715832e-02,   2.73962002e-02,
         -5.91390543e-02,  -4.17987294e-02]], dtype=float32)

In [33]:
batch_ct_M = tf.expand_dims(ct_M, axis=2)

print(batch_ct_M)
batch_ct_M.eval()

Tensor("ExpandDims:0", shape=(?, 8, 1), dtype=float32)


array([[[-0.02123714],
        [ 0.10588894],
        [-0.10926309],
        [-0.02437902],
        [ 0.04113741],
        [ 0.01197255],
        [-0.03193182],
        [ 0.06311099]],

       [[ 0.02261633],
        [-0.02912687],
        [-0.02649755],
        [-0.05920239],
        [ 0.03552807],
        [ 0.00198112],
        [ 0.03563669],
        [ 0.11155382]]], dtype=float32)

In [34]:
batch_rt = tf.expand_dims(rt, axis=2)

print(batch_rt)
batch_rt.eval()

Tensor("ExpandDims_1:0", shape=(?, 8, 1), dtype=float32)


array([[[-0.05162073],
        [ 0.0358412 ],
        [ 0.04149287],
        [-0.03796405],
        [ 0.02101916],
        [-0.00133579],
        [ 0.00930932],
        [-0.00747717]],

       [[-0.00384405],
        [ 0.00421883],
        [ 0.00807627],
        [-0.00077201],
        [-0.03191332],
        [ 0.02167791],
        [ 0.03467385],
        [-0.03085664]]], dtype=float32)

In [35]:
batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)
ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

print(ct_M_r)
ct_M_r.eval()

Tensor("Squeeze:0", shape=(?, 1), dtype=float32)


array([[ 0.00065791],
       [-0.00417699]], dtype=float32)

In [36]:
b = tf.Variable(0, dtype=tf.float32)

b.initializer.run()

print(b)
b.eval()

<tf.Variable 'Variable_2:0' shape=() dtype=float32_ref>


0.0

In [37]:
logits = ct_M_r + b
probs = tf.sigmoid(logits)

print(probs)
probs.eval()

Tensor("Sigmoid:0", shape=(?, 1), dtype=float32)


array([[ 0.49933356],
       [ 0.50007993]], dtype=float32)

**Loss**

In [38]:
# Targets:
# For each pair (context, utterance)
# 1 -> utterance is the correct sentence related to context
# 0 -> utterance is a random sentence related to other context
targets = tf.constant([1, 0], shape=(2, 1))

print(targets)
targets.eval()

Tensor("Const_3:0", shape=(2, 1), dtype=int32)


array([[1],
       [0]], dtype=int32)

In [39]:
loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=tf.to_float(targets), logits=logits)

print(loss)
loss.eval()

INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.
Tensor("sigmoid_cross_entropy_loss/value:0", shape=(), dtype=float32)


0.69348526

In [40]:
mean_loss = tf.reduce_mean(loss)

print(mean_loss)
mean_loss.eval()

Tensor("Mean:0", shape=(), dtype=float32)


0.69289988

In [41]:
session.close()
del graph

In [42]:
def dual_encoder(vocab_size,
                 embed_size,
                 hidden_size,
                 input_context,
                 input_context_len,
                 input_utterance,
                 input_utterance_len,
                 targets):

    with tf.variable_scope('embedding'):
        embeddings = tf.get_variable(
            'embeddings',
            shape=(vocab_size, embed_size),
            initializer=tf.random_uniform_initializer(-0.25, 0.25))

        context_embed = tf.nn.embedding_lookup(
            embeddings, input_context, name='context_embed')
        utterance_embed = tf.nn.embedding_lookup(
            embeddings, input_utterance, name='utterance_embed')

        input_embed = tf.concat([context_embed, utterance_embed], axis=0)
        input_length = tf.concat([input_context_len, input_utterance_len], axis=0)
        input_length = tf.reshape(input_length, [-1])
        
    with tf.variable_scope('rnn'):
        cell = tf.nn.rnn_cell.LSTMCell(
            hidden_size,
            forget_bias=2.0,
            use_peepholes=True,
            state_is_tuple=True)

        outputs, states = tf.nn.dynamic_rnn(
            cell,
            input_embed,
            sequence_length=input_length,
            dtype=tf.float32)

        context_encoding, utterance_encoding = tf.split(
            states.h, num_or_size_splits=2, axis=0)

    with tf.variable_scope('prediction'):
        ct = context_encoding
        rt = utterance_encoding
        M = tf.get_variable(
            'M',
            shape=(hidden_size, hidden_size),
            initializer=tf.truncated_normal_initializer())

        ct_M = tf.matmul(ct, M)
        batch_ct_M = tf.expand_dims(ct_M, axis=2)
        batch_rt = tf.expand_dims(rt, axis=2)
        batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)
        ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

        b = tf.get_variable(
            'b', shape=(), initializer=tf.zeros_initializer())
        
        logits = ct_M_r + b
        
        probs = tf.sigmoid(logits)

    if targets is None:
        return probs, None

    loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=tf.to_float(targets), logits=logits)
    loss = tf.reduce_mean(loss, name="loss")
    
    return probs, loss


graph = tf.Graph()
with graph.as_default(), tf.Session(graph=graph) as session:
    vocab_size = 91619
    embed_size = 100
    hidden_size = 256

    batch_size = 128
    sentence_size = 220
    input_context = tf.random_uniform(
        shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)
    input_context_len = tf.constant(sentence_size, shape=(batch_size, 1))
    input_utterance = tf.random_uniform(
        shape=(batch_size, sentence_size), minval=0, maxval=vocab_size, dtype=tf.int64)
    input_utterance_len = tf.constant(sentence_size, shape=(batch_size, 1))
    targets = tf.random_uniform(
        shape=(batch_size, 1), minval=0, maxval=1, dtype=tf.int64)    
    
    _, loss = dual_encoder(vocab_size,
                           embed_size,
                           hidden_size,
                           input_context,
                           input_context_len,
                           input_utterance,
                           input_utterance_len,
                           targets)
    
    init = tf.global_variables_initializer()
    session.run(init)
    
    loss_value = session.run(loss)
    
    print('Average loss: {:,.3f}'.format(loss_value))

del graph

INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.
Average loss: 3.698
