In [340]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell
from tensorflow.contrib import skflow
from tensorflow.python.framework import dtypes

In [423]:
# Hyperparameters
MAX_DOCUMENT_LENGTH = 50
MAX_CONTEXT_LENGTH = 160 # From paper
EMBEDDING_SIZE = 50

In [424]:
# Load Data
train_df = pd.read_csv("./data/train_small.csv")
test_df = pd.read_csv("./data/test.csv")
validation_df = pd.read_csv("./data/valid.csv")
y_test = np.zeros(len(test_df))

In [425]:
# Preprocessing
all_sentences = np.append(train_df.Context, train_df.Utterance)
vocab_processor = skflow.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH)
vocab_processor.fit(all_sentences)
X_train_context = np.array(list(vocab_processor.transform(train_df.Context)))
X_train_utterance = np.array(list(vocab_processor.transform(train_df.Utterance)))
X_train = np.stack([X_train_context, X_train_utterance], axis=1)
y_train = train_df.Label

In [426]:
n_words = len(vocab_processor.vocabulary_)
print("Total words: {}".format(n_words))

Total words: 25094


In [399]:
def rnn_encoder_model(X, y):
    # Split input tensor into separare context and utterance tensor
    context, utterance = tf.split(1, 2, X, name='split')
    context = tf.squeeze(context, [1])
    utterance = tf.squeeze(utterance, [1])
    
    # Embed context and utterance into the same space
    with tf.variable_scope("shared_embeddings") as vs:
        word_vectors_context = skflow.ops.categorical_variable(
            context, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
        word_list_context = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors_context)
        vs.reuse_variables()
        word_vectors_utterance = skflow.ops.categorical_variable(
            utterance, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
        word_list_utterance = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors_utterance)
    
    # Run context and utterance through the same RNN
    with tf.variable_scope("shared_rnn_params") as vs:
        cell = tf.nn.rnn_cell.BasicLSTMCell(100)
        _, encoding_context = tf.nn.rnn(cell, word_list_context, dtype=dtypes.float32)
        vs.reuse_variables()
        _, encoding_utterance = tf.nn.rnn(cell, word_list_utterance, dtype=dtypes.float32)

    with tf.variable_scope("prediction") as vs:
        W = tf.get_variable("W",
                            shape=[encoding_context.get_shape()[1], encoding_utterance.get_shape()[1]],
                            initializer=tf.random_normal_initializer())
        b = tf.get_variable("b", [1])
        
        # We can interpret this is a "Generated context"
        generated_context = tf.matmul(encoding_utterance, W)
        # Batch multiply contexts and utterances (batch_matmul only works with 3-d tensors)
        generated_context = tf.expand_dims(generated_context, 2)
        encoding_context = tf.expand_dims(encoding_context, 2)
        scores = tf.batch_matmul(generated_context, encoding_context, True) + b
        # Go from [15,1,1] to [15,1]: We want a vector of 15 scores
        scores = tf.squeeze(scores, [2])
        # Convert scores into probabilities
        probs = tf.sigmoid(scores)
        
        # Calculate loss
        loss = tf.contrib.losses.logistic(scores, tf.expand_dims(y, 1))
        
    return [probs, loss]

In [429]:
classifier = tf.contrib.learn.TensorFlowEstimator(model_fn=rnn_encoder_model, n_classes=1, continue_training=True)

In [430]:
while True:
    classifier.fit(X_train, y_train, logdir='/tmp/tf/dual_lstm_chatbot')

Step #99, avg. train loss: 12.44238
Step #199, avg. train loss: 0.70195
Step #99, avg. train loss: 0.69790
Step #199, avg. train loss: 0.69531
Step #99, avg. train loss: 0.69219
Step #199, avg. train loss: 0.68963
Step #99, avg. train loss: 0.68250
Step #199, avg. train loss: 0.67507
Step #99, avg. train loss: 0.65989
Step #199, avg. train loss: 0.64811
Step #99, avg. train loss: 0.63208
Step #199, avg. train loss: 0.61260
Step #99, avg. train loss: 0.58543
Step #199, avg. train loss: 0.55884
Step #99, avg. train loss: 0.54358
Step #199, avg. train loss: 0.50851
Step #99, avg. train loss: 0.50628
Step #199, avg. train loss: 0.46157
Step #99, avg. train loss: 0.42391
Step #199, avg. train loss: 0.39322
Step #99, avg. train loss: 0.38274
Step #199, avg. train loss: 0.33803
Step #99, avg. train loss: 0.29959
Step #199, avg. train loss: 0.27522
Step #99, avg. train loss: 0.26006
Step #199, avg. train loss: 0.23499
Step #99, avg. train loss: 0.21986
Step #199, avg. train loss: 0.22102
Step 

KeyboardInterrupt: 

In [352]:
# Evaluation
def evaluate_recall(y, y_labels, n=1):
    num_examples = float(len(y))
    num_correct = 0
    for predictions, label in zip(y, y_labels):
        if label in predictions[:n]:
            num_correct += 1
    return num_correct/num_examples

In [418]:
def predict_rnn(context, utterances, n=1):
    cvec = np.array(list(vocab_processor.transform([context])))
    input_vectors = []
    for u in utterances:
        uvec = np.array(list(vocab_processor.transform([u])))
        # print(uvec.shape)
        stacked = np.stack([cvec, uvec], axis=1)
        input_vectors.append(stacked)
    batch = np.vstack(input_vectors)
    result = classifier.predict_proba(batch)[:,0]
    print(".", end="")
    return np.argsort(result)[::-1]

In [419]:
predict_rnn(test_df.Context[1], test_df.iloc[1,1:].values)

.

array([6, 0, 5, 2, 8, 7, 4, 3, 9, 1])

In [431]:
# Evaluate RNN predictor
y_test = np.zeros(len(test_df))
y = [predict_rnn(test_df.Context[x], test_df.iloc[x,1:].values) for x in range(100)]
print("")
for n in [1, 2, 5, 10]:
    print("Recall @ ({}, 10): {:g}".format(n, evaluate_recall(y, y_test, n)))

....................................................................................................
Recall @ (1, 10): 0.15
Recall @ (2, 10): 0.25
Recall @ (5, 10): 0.5
Recall @ (10, 10): 1
