**Dual LSTM Encoder for Dialog Response Generation**

http://www.wildml.com/2016/07/deep-learning-for-chatbots-2-retrieval-based-model-tensorflow/

https://github.com/dennybritz/chatbot-retrieval

https://github.com/rkadlec/ubuntu-ranking-dataset-creator

https://arxiv.org/abs/1506.08909

In [1]:
import tensorflow as tf
tf.VERSION

'1.2.0'

**Estimator**

https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/estimator/Estimator

https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/estimator

**model_fn**
```text
 |        model_fn: Model function. Follows the signature:
 |      
 |          * Args:
 |      
 |            * `features`: This is the first item returned from the `input_fn`
 |                   passed to `train`, 'evaluate`, and `predict`. This should be a
 |                   single `Tensor` or `dict` of same.
 |            * `labels`: This is the second item returned from the `input_fn`
 |                   passed to `train`, 'evaluate`, and `predict`. This should be a
 |                   single `Tensor` or `dict` of same (for multi-head models). If
 |                   mode is `ModeKeys.PREDICT`, `labels=None` will be passed. If
 |                   the `model_fn`'s signature does not accept `mode`, the
 |                   `model_fn` must still be able to handle `labels=None`.
 |            * `mode`: Optional. Specifies if this training, evaluation or
 |                   prediction. See `ModeKeys`.
 |            * `params`: Optional `dict` of hyperparameters.  Will receive what
 |                   is passed to Estimator in `params` parameter. This allows
 |                   to configure Estimators from hyper parameter tuning.
 |            * `config`: Optional configuration object. Will receive what is passed
 |                   to Estimator in `config` parameter, or the default `config`.
 |                   Allows updating things in your model_fn based on configuration
 |                   such as `num_ps_replicas`, or `model_dir`.
 |      
 |          * Returns:
 |            `EstimatorSpec`
 ```

In [2]:
def dual_encoder(vocab_size,
                 embed_size,
                 hidden_size,
                 input_context,
                 input_context_len,
                 input_utterance,
                 input_utterance_len,
                 targets):

    with tf.variable_scope('embedding'):
        embeddings = tf.get_variable(
            'embeddings',
            shape=(vocab_size, embed_size),
            initializer=tf.random_uniform_initializer(-0.25, 0.25))

        context_embed = tf.nn.embedding_lookup(
            embeddings, input_context, name='context_embed')
        utterance_embed = tf.nn.embedding_lookup(
            embeddings, input_utterance, name='utterance_embed')

        input_embed = tf.concat([context_embed, utterance_embed], axis=0)
        input_length = tf.concat([input_context_len, input_utterance_len], axis=0)
        input_length = tf.reshape(input_length, [-1])

    with tf.variable_scope('rnn'):
        cell = tf.nn.rnn_cell.LSTMCell(
            hidden_size,
            forget_bias=2.0,
            use_peepholes=True,
            state_is_tuple=True)

        outputs, states = tf.nn.dynamic_rnn(
            cell,
            input_embed,
            sequence_length=input_length,
            dtype=tf.float32)

        context_encoding, utterance_encoding = tf.split(
            states.h, num_or_size_splits=2, axis=0)

    with tf.variable_scope('prediction'):
        ct = context_encoding
        rt = utterance_encoding
        M = tf.get_variable(
            'M',
            shape=(hidden_size, hidden_size),
            initializer=tf.truncated_normal_initializer())

        ct_M = tf.matmul(ct, M)
        batch_ct_M = tf.expand_dims(ct_M, axis=2)
        batch_rt = tf.expand_dims(rt, axis=2)
        batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)
        ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

        b = tf.get_variable(
            'b', shape=(), initializer=tf.zeros_initializer())
        
        logits = ct_M_r + b
        
        probs = tf.sigmoid(logits)

    if targets is None:
        return probs, None

    loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=tf.to_float(targets), logits=logits)
    loss = tf.reduce_mean(loss, name="loss")
    
    return probs, loss

In [3]:
def model_fn(features, labels, mode, params):
    vocab_size = params['vocab_size']
    embed_size = params['embed_size']
    hidden_size = params['hidden_size']

    input_context = features['context']
    input_context_len = features['context_len']
    input_utterance = features['utterance']
    input_utterance_len = features['utterance_len']

    probs, loss = dual_encoder(
        vocab_size,
        embed_size,
        hidden_size,
        input_context,
        input_context_len,
        input_utterance,
        input_utterance_len,
        labels)
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        _learning_rate = params['learning_rate']
        _optimizer =  params['optimizer']
        
        train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.contrib.framework.get_global_step(),
            learning_rate=_learning_rate,
            clip_gradients=10.0,
            optimizer=_optimizer)
    else:
        train_op = None
    
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=probs,
        loss=loss,
        train_op=train_op)

**Input**

In [4]:
# `tokenizer` function must be defined before restoring the vocabulary object
# (pickle does not serialize functions)
def tokenizer(sentences):
    return (sentence.split() for sentence in sentences)

class VocabularyAdapter:
    
    def __init__(self, vocabulary_bin):
        self._vocab = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(vocabulary_bin)
    
    @property
    def size(self):
        return len(self._vocab.vocabulary_)
    
    @property
    def vector_length(self):
        return self._vocab.max_document_length


def input_features(vector_length):
    return [
        tf.feature_column.numeric_column(
            key='context', shape=vector_length, dtype=tf.int64),
        tf.feature_column.numeric_column(
            key='context_len', shape=1, dtype=tf.int64),
        tf.feature_column.numeric_column(
            key='utterance', shape=vector_length, dtype=tf.int64),
        tf.feature_column.numeric_column(
            key='utterance_len', shape=1, dtype=tf.int64),
        tf.feature_column.numeric_column(
            key='label', shape=1, dtype=tf.int64),
    ]


def input_fn(name, filenames, features, batch_size, num_epochs=None):
    example_features = tf.feature_column.make_parse_example_spec(features)

    batch_example = tf.contrib.learn.read_batch_record_features(
        file_pattern=filenames,
        features=example_features,
        batch_size=batch_size,
        num_epochs=num_epochs,
        randomize_input=True,
        queue_capacity=200000 + batch_size * 10,
        name='read_batch_record_features_' + name
    )

    batch_target = batch_example.pop('label')

    return batch_example, batch_target

**Training**

In [5]:
import os

HOME_DIR = 'ubuntu'
DATA_DIR = os.path.join(HOME_DIR, 'data')
VOCAB_BIN = os.path.join(DATA_DIR, 'vocabulary.bin')
TRAIN_TFR = os.path.join(DATA_DIR, 'train.tfrecords')
VALID_TFR = os.path.join(DATA_DIR, 'valid.tfrecords')
TEST_TFR = os.path.join(DATA_DIR, 'test.tfrecords')

if not os.path.isfile(VOCAB_BIN):
    raise Exception('File not found: {}'.format(VOCAB_BIN))

if not os.path.isfile(TRAIN_TFR):
    raise Exception('File not found: {}'.format(TRAIN_TFR))

if not os.path.isfile(VALID_TFR):
    raise Exception('File not found: {}'.format(VALID_TFR))

if not os.path.isfile(TEST_TFR):
    raise Exception('File not found: {}'.format(TEST_TFR))

In [6]:
vocab = VocabularyAdapter(VOCAB_BIN)
features = input_features(vocab.vector_length)

In [7]:
params = {
    'vocab_size': vocab.size,
    'embed_size': 100,
    'hidden_size': 256,
    'learning_rate': 0.001,
    'optimizer': 'Adam',
    'batch_size': 128,
    'num_epochs': 5,
}

input_fn_train = lambda: input_fn('train', [TRAIN_TFR], features, params['batch_size'], 1)
input_fn_valid = lambda: input_fn('valid', [VALID_TFR], features, 16, 1)
input_fn_test = lambda: input_fn('test', [TEST_TFR], features, 16, 1)

In [8]:
import shutil

def remove_dir(path):
    if os.path.isdir(path):
        shutil.rmtree(path)

MODEL_DIR = os.path.join(HOME_DIR, 'model')

remove_dir(MODEL_DIR)

In [9]:
estimator = tf.estimator.Estimator(
    model_fn=model_fn,
    model_dir=MODEL_DIR,
    params=params)

estimator

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'ubuntu/model', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}


<tensorflow.python.estimator.estimator.Estimator at 0x7fecbe32d048>

In [None]:
%%time

for epoch in range(1, params['num_epochs'] + 1):
    print('[ Epoch {} ]\n'.format(epoch))
    print('Training...\n')
    %time estimator.train(input_fn_train, steps=None)
    print()
    print('Validation...\n')
    %time estimator.evaluate(input_fn_valid, steps=None)
    print()

[ Epoch 1 ]

Training...

INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into ubuntu/model/model.ckpt.
INFO:tensorflow:loss = 0.97372, step = 1
INFO:tensorflow:global_step/sec: 0.711163
INFO:tensorflow:loss = 0.697674, step = 101 (140.616 sec)
INFO:tensorflow:global_step/sec: 0.711914
INFO:tensorflow:loss = 0.708678, step = 201 (140.467 sec)
INFO:tensorflow:global_step/sec: 0.71188
INFO:tensorflow:loss = 0.677942, step = 301 (140.473 sec)
INFO:tensorflow:global_step/sec: 0.711896
INFO:tensorflow:loss = 0.681646, step = 401 (140.470 sec)
INFO:tensorflow:Saving checkpoints for 428 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.71012
INFO:tensorflow:loss = 0.662147, step = 501 (140.822 sec)
INFO:tensorflow:global_step/sec: 0.711948
INFO:tensorflow:loss = 0.678004, step = 601 (1

INFO:tensorflow:global_step/sec: 0.711821
INFO:tensorflow:loss = 0.521604, step = 6801 (140.485 sec)
INFO:tensorflow:Saving checkpoints for 6833 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.710412
INFO:tensorflow:loss = 0.443584, step = 6901 (140.764 sec)
INFO:tensorflow:global_step/sec: 0.711968
INFO:tensorflow:loss = 0.472203, step = 7001 (140.456 sec)
INFO:tensorflow:global_step/sec: 0.712077
INFO:tensorflow:loss = 0.531635, step = 7101 (140.433 sec)
INFO:tensorflow:global_step/sec: 0.711968
INFO:tensorflow:loss = 0.476889, step = 7201 (140.456 sec)
INFO:tensorflow:Saving checkpoints for 7260 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.710441
INFO:tensorflow:loss = 0.573417, step = 7301 (140.757 sec)
INFO:tensorflow:Saving checkpoints for 7333 into ubuntu/model/model.ckpt.
INFO:tensorflow:Loss for final step: 0.440986.
CPU times: user 2h 48min 52s, sys: 32min 49s, total: 3h 21min 41s
Wall time: 2h 51min 52s
Validation...

INFO:tensorflow:logi

INFO:tensorflow:global_step/sec: 0.711336
INFO:tensorflow:loss = 0.52733, step = 12734 (140.581 sec)
INFO:tensorflow:global_step/sec: 0.711077
INFO:tensorflow:loss = 0.324561, step = 12834 (140.631 sec)
INFO:tensorflow:Saving checkpoints for 12885 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.709867
INFO:tensorflow:loss = 0.384963, step = 12934 (140.871 sec)
INFO:tensorflow:global_step/sec: 0.711233
INFO:tensorflow:loss = 0.443101, step = 13034 (140.602 sec)
INFO:tensorflow:global_step/sec: 0.711285
INFO:tensorflow:loss = 0.475488, step = 13134 (140.590 sec)
INFO:tensorflow:global_step/sec: 0.711268
INFO:tensorflow:loss = 0.503082, step = 13234 (140.595 sec)
INFO:tensorflow:Saving checkpoints for 13312 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.709917
INFO:tensorflow:loss = 0.393502, step = 13334 (140.862 sec)
INFO:tensorflow:global_step/sec: 0.711139
INFO:tensorflow:loss = 0.342618, step = 13434 (140.618 sec)
INFO:tensorflow:global_step/sec: 0.

INFO:tensorflow:loss = 0.395848, step = 18567 (140.906 sec)
INFO:tensorflow:global_step/sec: 0.711194
INFO:tensorflow:loss = 0.439069, step = 18667 (140.608 sec)
INFO:tensorflow:global_step/sec: 0.711148
INFO:tensorflow:loss = 0.295515, step = 18767 (140.618 sec)
INFO:tensorflow:global_step/sec: 0.711185
INFO:tensorflow:loss = 0.279206, step = 18867 (140.610 sec)
INFO:tensorflow:Saving checkpoints for 18937 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.709789
INFO:tensorflow:loss = 0.292128, step = 18967 (140.887 sec)
INFO:tensorflow:global_step/sec: 0.711391
INFO:tensorflow:loss = 0.23167, step = 19067 (140.570 sec)
INFO:tensorflow:global_step/sec: 0.711156
INFO:tensorflow:loss = 0.338451, step = 19167 (140.616 sec)
INFO:tensorflow:global_step/sec: 0.711205
INFO:tensorflow:loss = 0.282506, step = 19267 (140.606 sec)
INFO:tensorflow:Saving checkpoints for 19364 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.709854
INFO:tensorflow:loss = 0.334717, st

INFO:tensorflow:Saving checkpoints for 24562 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.71047
INFO:tensorflow:loss = 0.283662, step = 24600 (140.752 sec)
INFO:tensorflow:global_step/sec: 0.712095
INFO:tensorflow:loss = 0.315545, step = 24700 (140.431 sec)
INFO:tensorflow:global_step/sec: 0.712011
INFO:tensorflow:loss = 0.168355, step = 24800 (140.448 sec)
INFO:tensorflow:global_step/sec: 0.711799
INFO:tensorflow:loss = 0.222568, step = 24900 (140.489 sec)
INFO:tensorflow:Saving checkpoints for 24989 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.710389
INFO:tensorflow:loss = 0.199816, step = 25000 (140.768 sec)
INFO:tensorflow:global_step/sec: 0.711876
INFO:tensorflow:loss = 0.273193, step = 25100 (140.474 sec)
INFO:tensorflow:global_step/sec: 0.711822
INFO:tensorflow:loss = 0.220086, step = 25200 (140.484 sec)
INFO:tensorflow:global_step/sec: 0.712023
INFO:tensorflow:loss = 0.206731, step = 25300 (140.445 sec)
INFO:tensorflow:global_step/sec: 0.

INFO:tensorflow:loss = 0.218051, step = 30433 (140.485 sec)
INFO:tensorflow:global_step/sec: 0.711905
INFO:tensorflow:loss = 0.187258, step = 30533 (140.468 sec)
INFO:tensorflow:Saving checkpoints for 30614 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.710443
INFO:tensorflow:loss = 0.114917, step = 30633 (140.757 sec)
INFO:tensorflow:global_step/sec: 0.712001
INFO:tensorflow:loss = 0.163347, step = 30733 (140.449 sec)
INFO:tensorflow:global_step/sec: 0.712063
INFO:tensorflow:loss = 0.118109, step = 30833 (140.436 sec)
INFO:tensorflow:global_step/sec: 0.711836
INFO:tensorflow:loss = 0.150816, step = 30933 (140.483 sec)
INFO:tensorflow:global_step/sec: 0.711934
INFO:tensorflow:loss = 0.131284, step = 31033 (140.461 sec)
INFO:tensorflow:Saving checkpoints for 31041 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.710555
INFO:tensorflow:loss = 0.280884, step = 31133 (140.735 sec)
INFO:tensorflow:global_step/sec: 0.711906
INFO:tensorflow:loss = 0.191508, s

In [None]:
%%time

for epoch in range(1, params['num_epochs'] + 1):
    print('[ Epoch {} ]\n'.format(epoch))
    print('Training...\n')
    %time estimator.train(input_fn_train, steps=None)
    print()
    print('Validation...\n')
    %time estimator.evaluate(input_fn_valid, steps=None)
    print()

[ Epoch 1 ]

Training...

INFO:tensorflow:logits.dtype=<dtype: 'float32'>.
INFO:tensorflow:multi_class_labels.dtype=<dtype: 'float32'>.
INFO:tensorflow:losses.dtype=<dtype: 'float32'>.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from ubuntu/model/model.ckpt-36665
INFO:tensorflow:Saving checkpoints for 36666 into ubuntu/model/model.ckpt.
INFO:tensorflow:loss = 0.148773, step = 36666
INFO:tensorflow:global_step/sec: 0.711895
INFO:tensorflow:loss = 0.142415, step = 36766 (140.472 sec)
INFO:tensorflow:global_step/sec: 0.712117
INFO:tensorflow:loss = 0.103478, step = 36866 (140.427 sec)
INFO:tensorflow:global_step/sec: 0.711926
INFO:tensorflow:loss = 0.158693, step = 36966 (140.464 sec)
INFO:tensorflow:global_step/sec: 0.712185
INFO:tensorflow:loss = 0.160587, step = 37066 (140.414 sec)
INFO:tensorflow:Saving checkpoints for 37094 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.710778
INFO:tensorflow:loss = 0.220003, step = 37166 (140.69

INFO:tensorflow:loss = 0.0940582, step = 43266 (140.496 sec)
INFO:tensorflow:global_step/sec: 0.711451
INFO:tensorflow:loss = 0.0859758, step = 43366 (140.558 sec)
INFO:tensorflow:global_step/sec: 0.711623
INFO:tensorflow:loss = 0.100843, step = 43466 (140.525 sec)
INFO:tensorflow:Saving checkpoints for 43510 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.710127
INFO:tensorflow:loss = 0.162323, step = 43566 (140.819 sec)
INFO:tensorflow:global_step/sec: 0.711622
INFO:tensorflow:loss = 0.116741, step = 43666 (140.524 sec)
INFO:tensorflow:global_step/sec: 0.711724
INFO:tensorflow:loss = 0.136659, step = 43766 (140.504 sec)
INFO:tensorflow:global_step/sec: 0.711633
INFO:tensorflow:loss = 0.127018, step = 43866 (140.522 sec)
INFO:tensorflow:Saving checkpoints for 43937 into ubuntu/model/model.ckpt.
INFO:tensorflow:global_step/sec: 0.710152
INFO:tensorflow:loss = 0.119566, step = 43966 (140.815 sec)
INFO:tensorflow:Saving checkpoints for 43998 into ubuntu/model/model.ckpt.