Exercise 7
===
Create a RNN that can learn a Reber grammer (http://www.willamette.edu/~gorr/classes/cs449/reber.html)

In [1]:
import random

start_symbol = "b"
end_symbol = "e"

symbols = [start_symbol, "t", "p", "s", "x", "v", end_symbol]

def get_next_symbols(previous_symbol, current_symbol):
    if current_symbol == None and previous_symbol == None:
        return [start_symbol]
    elif current_symbol == start_symbol:
        return ["t", "p"]
    elif current_symbol == "t":
        if previous_symbol == start_symbol:
            return ["s", "x"]
        elif previous_symbol in ["p", "x", "t"]:
            return ["t", "v"]
        else:
            raise Exception("Invalid grammar.")
    elif current_symbol == "p":
        if previous_symbol == start_symbol:
            return ["t", "v"]
        elif previous_symbol == "v":
            return ["x", "s"]
        else:
            raise Exception("Invalid grammar.")
    elif current_symbol == "s":
        if previous_symbol in ["t", "s"]:
            return ["x", "s"]
        elif previous_symbol in ["x", "p"]:
            return [end_symbol]
        else:
            raise Exception("Invalid grammar.")
    elif current_symbol == "x":
        if previous_symbol in ["t", "s"]:
            return ["x", "s"]
        elif previous_symbol in ["x", "p"]:
            return ["t", "v"]
        else:
            raise Exception("Invalid grammar.")
    elif current_symbol == "v":
        if previous_symbol in ["t", "x", "p"]:
            return ["p", "v"]
        elif previous_symbol == "v":
            return [ end_symbol ]
        else:
            raise Exception("Invalid grammar.")
    elif current_symbol == end_symbol:
        return []
    else:
        raise Exception("Invalid symbols: %s and %s." % (previous_symbol, current_symbol))

def get_next_symbols_for_string(reber_str):
    previous_symbol = reber_str[-2] if len(reber_str) >= 2 else None
    current_symbol = reber_str[-1] if len(reber_str) >= 1 else None
    return get_next_symbols(previous_symbol, current_symbol)

def create_reber_string():
    reber_str = ""
    while not reber_str.endswith(end_symbol):
        reber_str += random.choice(get_next_symbols_for_string(reber_str))
    return reber_str

def is_valid_reber_string(value):
    index = 0
    while index < len(value):
        current = value[index]
        next_symbols = get_next_symbols_for_string(value[:index] if index != 0 else "")
        if current not in next_symbols:
            return False
        index += 1
    return True

In [11]:
import numpy as np
import tensorflow as tf

seed = 42
np.random.seed(seed)
tf.set_random_seed(seed)

def create_reber_strings(size):
    strings = []
    while len(strings) < size:
        new_string = create_reber_string()
        if new_string not in strings:
            strings.append(new_string)
    return strings

def invalidate_reber_string(value):
    while is_valid_reber_string(value):
        char_list = list(value)
        char_list[random.randint(0, len(value) - 1)] = random.choice(symbols)
        value = "".join(char_list)
    return value

def invalidate_reber_strings(strings):
    return [invalidate_reber_string(x) for x in strings]


def prep_strings_for_model(strings, sequence_length):
    return np.array([np.pad(x, (0, sequence_length - len(x) % sequence_length), 'constant') for x in strings])

def generate_dataset(size, error_ratio = 0.5):
    if size % 2 != 0:
        raise Exception("size must be a multiple of 2.")
    correct_strings = create_reber_strings(int(size * error_ratio))
    incorrect_strings = invalidate_reber_strings(correct_strings)
    strings = np.array([list(map(lambda a: float(ord(a)), x)) for x in correct_strings + incorrect_strings])
    correct_val = 1.0
    incorrect_val = 0.0
    targets = np.array(([ correct_val ] * len(correct_strings)) + ([ incorrect_val ] * len(incorrect_strings)))
    indices = np.random.permutation(size)
    max_sequence_length = max([len(x) for x in strings])
    strings = prep_strings_for_model(strings, max_sequence_length)
    print(strings[0])
    return strings[indices], targets[indices]

train_size = 100
validation_size = 20
test_size = 20

all_strings, all_targets = generate_dataset(train_size + validation_size + test_size)
train_X = all_strings[:train_size]
train_y = all_targets[:train_size]
validation_X = all_strings[train_size:train_size+validation_size]
validation_y = all_targets[train_size:train_size+validation_size]
test_X = all_strings[train_size+validation_size:train_size+validation_size+test_size]
test_y = all_targets[train_size+validation_size:train_size+validation_size+test_size]

print("train size", len(train_X), "validation size", len(validation_X), "test size", len(test_X))

max_sequence_length = train_X.shape[1]
print("The max sequence length is", max_sequence_length)

[  98.  116.  115.  115.  115.  115.  115.  120.  120.  118.  112.  115.
  101.    0.    0.    0.    0.    0.    0.    0.    0.    0.]
train size 100 validation size 20 test size 20


IndexError: tuple index out of range

In [3]:
import math
from sklearn.base import BaseEstimator, ClassifierMixin

def create_next_batch_fn(data, targets, batch_size):
    assert len(data) == len(targets)
    current_batch = 0
    def next_batch():
        nonlocal current_batch
        i = current_batch
        #print(current_batch)
        current_batch = (current_batch + batch_size) % len(images)
        return data[i:i+batch_size], targets[i:i+batch_size]
    return next_batch

class RnnClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_steps, learning_rate=0.001, n_neurons=2):
        self.n_steps = n_steps
        self.learning_rate = learning_rate
        self.n_neurons = n_neurons
        self._build_graph()

    def _build_graph(self):
        n_inputs = 1
        self.n_output = 1
        self.batch_size = 20
        
        with tf.device("/gpu:0"):
            self.x = tf.placeholder(tf.float32, shape=(None, self.n_steps, n_inputs), name="input")
            self.y = tf.placeholder(tf.int32, shape=(None), name="y")

            with tf.name_scope("rnn"):
                cell = tf.contrib.rnn.OutputProjectionWrapper(
                    tf.contrib.rnn.BasicRNNCell(num_units=self.n_neurons, activation=tf.nn.relu),
                    output_size=self.n_output)
                outputs, states = tf.nn.dynamic_rnn(cell, self.x, dtype=tf.float32)
                #self.evaluation = tf.nn.logistic(outputs)

            with tf.name_scope("loss"):
                self.loss = tf.reduce_mean(tf.square(outputs - y))

            with tf.name_scope("training"):
                optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
                self.training_op = optimizer.minimize(self.loss)

            with tf.name_scope("eval"):
                correctness = None
                self.accuracy = tf.reduce_mean(tf.cast(correctness, tf.float32)) * 100
            
        self.init = tf.global_variables_initializer()

    def fit(self, X, y, valid_X, valid_y, epochs = 20):
        saver = tf.train.Saver()

        interim_checkpoint_path = "./checkpoints/reber_rnn_model.ckpt"
        early_stopping_checkpoint_path = "./checkpoints/reber_rnn_model_early_stopping.ckpt"

        from datetime import datetime

        now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
        root_logdir = "tf_logs"
        log_dir = "{}/run-{}/".format(root_logdir, now)

        loss_summary = tf.summary.scalar('loss', self.loss)
        accuracy_summary = tf.summary.scalar("accuracy", self.accuracy)
        summary_op = tf.summary.merge([loss_summary, accuracy_summary])
        file_writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())
        
        n_batches = int(np.ceil(len(X) // self.batch_size))
        next_batch = create_next_batch_fn(X, y, self.batch_size)
            
        early_stopping_check_frequency = self.batch_size // 4
        early_stopping_check_limit = self.batch_size * 2

        sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        self.session = sess
        self.init.run()
        #saver.restore(sess, interim_checkpoint_path)

        best_validation_acc = 0.0
        best_validation_step = 0
        for epoch in range(epochs):
            print("epoch", epoch)
            for batch_index in range(n_batches):
                step = epoch * n_batches + batch_index
                X_batch, y_batch = next_batch()
                if batch_index % 10 == 0:
                    summary_str = summary_op.eval(session=sess, feed_dict={self.x: X_batch, self.y: y_batch})
                    file_writer.add_summary(summary_str, step)
                t, l, a = sess.run([self.training_op, self.loss, self.accuracy], feed_dict={self.x: X_batch, self.y: y_batch})
                if batch_index % 10 == 0: print("loss:", l, "train accuracy:", a)
                # Early stopping check
                if batch_index % early_stopping_check_frequency == 0:
                    validation_acc = self.prediction_accuracy(valid_X, valid_y)
                    print("validation accuracy", validation_acc)
                    if validation_acc > best_validation_acc:
                        saver.save(sess, early_stopping_checkpoint_path)
                        best_validation_acc = validation_acc
                        best_validation_step = step
                    elif step >= (best_validation_step + early_stopping_check_limit):
                        print("Stopping early during epoch", epoch)
                        break
            else:
                continue
            break
            save_path = saver.save(sess, interim_checkpoint_path)
        saver.restore(sess, early_stopping_checkpoint_path)
        save_path = saver.save(sess, "./checkpoints/reber_rnn_model_final.ckpt")
            
    def predict_proba(self, X):
        dataset_size = X.shape[0]
        #print "dataset_size: ", dataset_size, " batch_size: ", batch_size
        predictions = np.ndarray(shape=(dataset_size, self.n_output), dtype=np.float32)
        steps = int(math.ceil(dataset_size / self.batch_size))
        #print "steps: ", steps
        for step in range(steps):
            offset = (step * self.batch_size)
            #print "offset ", offset
            data_end_index = min(offset + self.batch_size, dataset_size)
            batch_data = X[offset:data_end_index, :]
            feed_dict = {
                self.x: batch_data
            }
            predictions[offset:data_end_index, :] = self.evaluation.eval(session=self.session, feed_dict=feed_dict)
        #print("predict_proba", predictions)
        return predictions

    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)
    
    def _prediction_accuracy(self, predictions, labels):
        return (100.0 * np.sum(np.argmax(predictions, 1) == labels)
              / predictions.shape[0])
    
    def prediction_accuracy(self, X, y):
        predictions = self.predict_proba(X)
        return self._prediction_accuracy(predictions, y)

In [4]:
rnn_classifier = RnnClassifier(max_sequence_length, n_neurons=2)

NameError: name 'max_sequence_length' is not defined