In [1]:
import tensorflow as tf
print 'TensorFlow version:', tf.__version__

TensorFlow version: 0.10.0


# Character Level RNN on Startup Quotes

## Introduction

Inspired by [@karpathy](https://github.com/karpathy)'s char-rnn but written in TensorFlow.

In this example notebook we will be training on startup quotes sourced from this tsv: https://github.com/startuptxt/startuptxt.github.io/blob/master/quotes.tsv

## Prepare the Data

### Download

In [2]:
f = open('./quotes.tsv', 'rU')
lines = f.readlines()
f.close()

In [3]:
quotes = [line.split('\t')[0] for line in lines]
print quotes[:5]

['If you want to teach people a new way of thinking, don\xe2\x80\x99t bother trying to teach them. Instead, give them a tool, the use of which will lead to new ways of thinking.', 'When I am working on a problem, I never think about beauty\xe2\x80\xa6\xe2\x80\xa6.. but when I have finished, if the solution is not beautiful, I know it is wrong.', 'Humans beings always do the most intelligent thing\xe2\x80\xa6after they\xe2\x80\x99ve tried every stupid alternative and none of them have worked.', 'I just invent, then wait until man comes around to needing what I\xe2\x80\x99ve invented.', 'I\xe2\x80\x99m not a genius. I\xe2\x80\x99m just a tremendous bundle of experience.']


In [4]:
raw_input_data = '\n'.join(quotes)

### Pre-process

One-hot encoding for each character.

In [5]:
# Create one-hot mapping by tricking sklearn's DictVectorizer
from sklearn.feature_extraction import DictVectorizer
v = DictVectorizer(sparse=False)
unique_chars = set(raw_input_data)
# Build a dict that looks like [{'a':1}, {'b':1}, {'c':1}, ...]
D = [{char:1} for char in unique_chars]
# 
v.fit(D)

DictVectorizer(dtype=<type 'numpy.float64'>, separator='=', sort=True,
        sparse=False)

In [6]:
v.transform({'I':1})

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])

In [7]:
def encode_string(string):
    out = []
    for char in string:
        out.append(v.transform({char: 1})[0]) # returns the one-hot array associated with the character
    return out

In [8]:
q0_enc = encode_string("If you\nwant")
# The string "If" after one-hot encoding
q0_enc[:2]

[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]),
 array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  

In [9]:
def decode_string(string):
    out = []
    for char in string:
        char_dict = v.inverse_transform(char.reshape(1, -1))[0] # returns the dict associated with the character e.g. {'I': 1}
        out += char_dict.keys()[0] # append the key, which is the character we want e.g. 'I'
    return ''.join(out) # join the characters together to form the decoded string

In [10]:
decode_string(q0_enc)

'If you\nwant'

### Inputs and Placeholders

In [11]:
# Training Parameters
learning_rate = 0.001
training_iters = 1000

In [12]:
# Network Parameters
n_input = len(unique_chars) # Length of one-hot encoded vectors i.e. which is also the number of unique characters
n_output = n_input # The characters we feed the NN are the same characters it will be outputting
n_steps = 100 # Number of previous characters to look at
n_hidden = 100 # Number of nodes in each hidden layer
n_layers = 1 # Number of hidden layers

In [13]:
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_output])

# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, n_output]))
}
biases = {
    'out': tf.Variable(tf.random_normal([n_output]))
}

## Build the Graph

### Inference

In [15]:
def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
    
    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    # Reshaping to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.split(0, n_steps, x)

    # Define a lstm cell with tensorflow
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)

    # Get lstm cell output
    outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

pred = RNN(x, weights, biases)

### Loss & Optimizer

In [16]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

## Train the Model

In [17]:
# start our tensorflow session
sess = tf.Session()
# initialize the varsriables we defined above
init = tf.initialize_all_variables()
sess.run(init)

In [None]:
raw_input_data

In [None]:
for i in tqdm(range(training_iters)):
    # randomly select a place in the entire traing block
    # select the previous 
    batch_x = raw_input_data
    # Reshape data to get 28 seq of 28 elements
    batch_x = batch_x.reshape((batch_size, n_steps, n_input))
    # Run optimization op (backprop)
    sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})