**Dual LSTM Encoder for Dialog Response Generation**

http://www.wildml.com/2016/07/deep-learning-for-chatbots-2-retrieval-based-model-tensorflow/

https://github.com/dennybritz/chatbot-retrieval

https://github.com/rkadlec/ubuntu-ranking-dataset-creator

https://arxiv.org/abs/1506.08909

In [1]:
import tensorflow as tf
tf.VERSION

'1.2.0'

**Estimator**

https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/estimator/Estimator

https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/estimator

**Model**

In [2]:
def dual_encoder(vocab_size,
                 embed_size,
                 hidden_size,
                 input_context,
                 input_context_len,
                 input_utterance,
                 input_utterance_len,
                 targets):

    with tf.variable_scope('embedding'):
        embeddings = tf.get_variable(
            'embeddings',
            shape=(vocab_size, embed_size),
            initializer=tf.random_uniform_initializer(-0.25, 0.25))

        context_embed = tf.nn.embedding_lookup(
            embeddings, input_context, name='context_embed')
        utterance_embed = tf.nn.embedding_lookup(
            embeddings, input_utterance, name='utterance_embed')

        input_embed = tf.concat([context_embed, utterance_embed], axis=0)
        input_length = tf.concat([input_context_len, input_utterance_len], axis=0)
        input_length = tf.reshape(input_length, [-1])

    with tf.variable_scope('rnn'):
        cell = tf.nn.rnn_cell.LSTMCell(
            hidden_size,
            forget_bias=2.0,
            use_peepholes=True,
            state_is_tuple=True)

        outputs, states = tf.nn.dynamic_rnn(
            cell,
            input_embed,
            sequence_length=input_length,
            dtype=tf.float32)

        context_encoding, utterance_encoding = tf.split(
            states.h, num_or_size_splits=2, axis=0)

    with tf.variable_scope('prediction'):
        ct = context_encoding
        rt = utterance_encoding
        M = tf.get_variable(
            'M',
            shape=(hidden_size, hidden_size),
            initializer=tf.truncated_normal_initializer())

        ct_M = tf.matmul(ct, M)
        batch_ct_M = tf.expand_dims(ct_M, axis=2)
        batch_rt = tf.expand_dims(rt, axis=2)
        batch_ct_M_r = tf.matmul(batch_ct_M, batch_rt, transpose_a=True)
        ct_M_r = tf.squeeze(batch_ct_M_r, axis=2)

        b = tf.get_variable(
            'b', shape=(), initializer=tf.zeros_initializer())
        
        logits = ct_M_r + b
        
        probs = tf.sigmoid(logits)

    if targets is None:
        return probs, None

    loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=tf.to_float(targets), logits=logits)
    loss = tf.reduce_mean(loss, name="loss")
    
    return probs, loss


def model_fn(features, labels, mode, params):
    vocab_size = params['vocab_size']
    embed_size = params['embed_size']
    hidden_size = params['hidden_size']

    input_context = features['context']
    input_context_len = features['context_len']
    input_utterance = features['utterance']
    input_utterance_len = features['utterance_len']

    probs, loss = dual_encoder(
        vocab_size,
        embed_size,
        hidden_size,
        input_context,
        input_context_len,
        input_utterance,
        input_utterance_len,
        labels)
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        _learning_rate = params['learning_rate']
        _optimizer =  params['optimizer']
        
        train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.contrib.framework.get_global_step(),
            learning_rate=_learning_rate,
            clip_gradients=10.0,
            optimizer=_optimizer)
    else:
        train_op = None
    
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=probs,
        loss=loss,
        train_op=train_op)

**Input**

In [3]:
# `tokenizer` function must be defined before restoring the vocabulary object
# (pickle does not serialize functions)
def tokenizer(sentences):
    return (sentence.split() for sentence in sentences)

class VocabularyAdapter:
    
    def __init__(self, vocabulary_bin):
        self._vocab = tf.contrib.learn.preprocessing.VocabularyProcessor.restore(vocabulary_bin)
    
    @property
    def size(self):
        return len(self._vocab.vocabulary_)
    
    @property
    def vector_length(self):
        return self._vocab.max_document_length

    def transform(self, sentence):
        return next(self._vocab.transform([sentence]))
    
    def tokens(self, sentence):
        return next(self._vocab._tokenizer([sentence]))

def input_fn_raw_data(vocab, context, utterance):
    context_len = len(vocab.tokens(context))
    if context_len > vocab.vector_length:
        raise Exception(
            'Context is too long (max length {}): {}'.format(
                vocab.vector_length, context_len))
    utterance_len = len(vocab.tokens(utterance))
    if utterance_len > vocab.vector_length:
        raise Exception(
            'Utterance is too long (max length {}): {}'.format(
                vocab.vector_length, utterance_len))
    
    context_vector = vocab.transform(context)
    utterance_vector = vocab.transform(utterance)

    data = {
        'context': tf.constant(context_vector, shape=(1, vocab.vector_length)),
        'context_len': tf.constant(context_len, shape=(1, 1)),
        'utterance': tf.constant(utterance_vector, shape=(1, vocab.vector_length)),
        'utterance_len': tf.constant(utterance_len, shape=(1, 1)),
    }
    target = None

    return data, target

def input_fn_predict(vocab, context, utterance):
    return lambda: input_fn_raw_data(vocab, context, utterance)

**Inference**

In [4]:
import os

HOME_DIR = 'ubuntu'
DATA_DIR = os.path.join(HOME_DIR, 'data')
VOCAB_BIN = os.path.join(DATA_DIR, 'vocabulary.bin')
MODEL_DIR = os.path.join(HOME_DIR, 'model')

if not os.path.isfile(VOCAB_BIN):
    raise Exception('File not found: {}'.format(VOCAB_BIN))

if not os.path.isdir(MODEL_DIR):
    raise Exception('Folder not found: {}'.format(MODEL_DIR))

In [5]:
vocab = VocabularyAdapter(VOCAB_BIN)

params = {
    'vocab_size': vocab.size,
    'embed_size': 100,
    'hidden_size': 256,
}

In [6]:
estimator = tf.estimator.Estimator(
    model_fn=model_fn,
    model_dir=MODEL_DIR,
    params=params)

estimator

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'ubuntu/model', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000}


<tensorflow.python.estimator.estimator.Estimator at 0x7fee85ca3278>

In [7]:
input_fn_0 = input_fn_predict(vocab, 'what is ubuntu ?', 'ubuntu is a linux distribution .')

prob = next(estimator.predict(input_fn_0))
prob

INFO:tensorflow:Restoring parameters from ubuntu/model/model.ckpt-1000


array([ 0.71867824], dtype=float32)

In [8]:
input_fn_1 = input_fn_predict(vocab, 'what is ubuntu ?', 'ubuntu is a nguni bantu term meaning humanity .')

prob = next(estimator.predict(input_fn_1))
prob

INFO:tensorflow:Restoring parameters from ubuntu/model/model.ckpt-1000


array([ 0.67478782], dtype=float32)