In [1]:
import tensorflow as tf
print 'TensorFlow version:', tf.__version__
import numpy as np
from tqdm import tqdm

TensorFlow version: 0.10.0


# Character Level RNN on Startup Quotes

## Introduction

Inspired by [@karpathy](https://github.com/karpathy)'s char-rnn but written in TensorFlow.

In this example notebook we will be training on startup quotes sourced from this tsv: https://github.com/startuptxt/startuptxt.github.io/blob/master/quotes.tsv

## Prepare the Data

### Download

In [2]:
f = open('./quotes.tsv', 'rU')
lines = f.readlines()
f.close()

In [3]:
quotes = [line.split('\t')[0] for line in lines]
print quotes[:5]

['If you want to teach people a new way of thinking, don\xe2\x80\x99t bother trying to teach them. Instead, give them a tool, the use of which will lead to new ways of thinking.', 'When I am working on a problem, I never think about beauty\xe2\x80\xa6\xe2\x80\xa6.. but when I have finished, if the solution is not beautiful, I know it is wrong.', 'Humans beings always do the most intelligent thing\xe2\x80\xa6after they\xe2\x80\x99ve tried every stupid alternative and none of them have worked.', 'I just invent, then wait until man comes around to needing what I\xe2\x80\x99ve invented.', 'I\xe2\x80\x99m not a genius. I\xe2\x80\x99m just a tremendous bundle of experience.']


In [4]:
raw_input_string = '\n'.join(quotes)

### Pre-process

One-hot encoding for each character.

In [5]:
# Create one-hot mapping by tricking sklearn's DictVectorizer
from sklearn.feature_extraction import DictVectorizer
v = DictVectorizer(sparse=False)
unique_chars = set(raw_input_string)
# Build a dict that looks like [{'a':1}, {'b':1}, {'c':1}, ...]
D = [{char:1} for char in unique_chars]
# 
v.fit(D)

DictVectorizer(dtype=<type 'numpy.float64'>, separator='=', sort=True,
        sparse=False)

In [6]:
def encode_string(string):
    out = []
    for char in string:
        vec = v.transform({char: 1})[0]
        out.append(vec) # returns the one-hot array associated with the character
    return np.array(out)

In [7]:
example_enc = encode_string("If you\nwant")
# The string "If" after one-hot encoding
example_enc[:2]

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0

In [8]:
example_enc.shape

(11, 91)

In [9]:
def decode_string(string):
    out = []
    for char in string:
        char_dict = v.inverse_transform(char.reshape(1, -1))[0] # returns the dict associated with the character e.g. {'I': 1}
        out += char_dict.keys()[0] # append the key, which is the character we want e.g. 'I'
    return ''.join(out) # join the characters together to form the decoded string

In [10]:
decode_string(example_enc)

'If you\nwant'

### Inputs and Placeholders

In [11]:
# Training Parameters
learning_rate = 0.001
training_iters = 10000
batch_size = 100

In [12]:
# Network Parameters
n_input = len(unique_chars) # Length of one-hot encoded vectors i.e. which is also the number of unique characters
n_output = n_input # The characters we feed the NN are the same characters it will be outputting
n_steps = 100 # Number of previous characters to look at
n_hidden = 200 # Number of nodes in each hidden layer
n_layers = 3 # Number of hidden layers

In [13]:
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_output])

# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, n_output]))
}
biases = {
    'out': tf.Variable(tf.random_normal([n_output]))
}

## Build the Graph

### Inference

In [14]:
# Define a lstm cell with tensorflow
cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * n_layers, state_is_tuple=True)

In [15]:
# Get lstm cell outputs
outputs, states = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32)
outputs = tf.transpose(outputs, [1, 0, 2])
last = tf.gather(outputs, int(outputs.get_shape()[0]) - 1)

In [16]:
# Linear activation, using rnn inner loop last output
pred = tf.matmul(last, weights['out']) + biases['out']

### Loss & Optimizer

In [17]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


## Train the Model

In [18]:
# start our tensorflow session
sess = tf.Session()
# initialize the varsriables we defined above
init = tf.initialize_all_variables()
sess.run(init)

In [19]:
# define a function that outputs two arrays:
# 1. batch_x - an array with shape (n, t, m) where n=batch_size, t=number of characters aka time-steps, and m=length of one-hot character vector
# 2. batch_y - an array with shape (n, m) where n=batch_size and m=length one-hot character vector
def next_batch(string, batch_size=75, n_steps=50):
    batch_x = []
    batch_y = []
    i_ys = np.random.randint(n_steps, len(string), size=batch_size) # e.g. i_ys=np.array([25, 50, 75])
    for i_y in i_ys:
        i_x_end = i_y # e.g. i_x_end = i_y = 25
        if i_x_end-n_steps < 0:
            i_x_start = 0 # e.g. i_start = 0
        else:
            i_x_start = i_x_end-n_steps
        string_x = string[i_x_start:i_x_end] # e.g. string[0:25]
        string_y = string[i_y] # e.g. string[25]
        vec_x = encode_string(string_x)
        vec_y = encode_string(string_y)
        batch_x.append(vec_x)
        batch_y.append(vec_y[0])
    return np.array(batch_x), np.array(batch_y)

In [20]:
# Test the next_batch function
batch_x, batch_y = next_batch(raw_input_string, 1, 75)
print batch_x.shape
print repr(decode_string(batch_x[0]))
print batch_y.shape
print repr(decode_string(batch_y))

(1, 75, 91)
' strategic moves or their intuitive business sense or a variety of other se'
(1, 91)
'l'


In [21]:
for i in tqdm(range(training_iters)):
#for i in range(training_iters):
    # Get the next batch of training data
    batch_x, batch_y = next_batch(raw_input_string, batch_size, n_steps)
    # Run optimization op (backprop)
    sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})

100%|██████████| 10000/10000 [39:09<00:00,  4.25it/s]


## Evaluate the Model

In [22]:
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [23]:
test_x, test_y = next_batch(raw_input_string, 250, n_steps)
print "Testing Accuracy:", \
    sess.run(accuracy, feed_dict={x: test_x, y: test_y})

Testing Accuracy: 0.78


## Predict some Text

In [32]:
n_chars = 1000

In [33]:
op_pred_char = tf.argmax(pred, 1)

In [34]:
# Build initial seed string
i = np.random.randint(0, 5000)
seed_string = raw_input_string[i:(i+n_steps)]
print repr(seed_string)

' in business is not to think too much about making it.\nIn the long run people are going to buy the c'


In [35]:
# Loop throught the following steps:
# 1. Grab the last t characters
# 2. Predict and append the next letter
# 3. Repeat steps 1-2 until the limit is reached
for i in tqdm(range(n_chars)):
    # Encode last t characters of seed string
    new_seed = seed_string[-n_steps:]
    enc_seed = np.array([encode_string(new_seed)]) # transform into shape=[1, 50, 91]

    # Predict using seed string
    pred_index = op_pred_char.eval(feed_dict={x:enc_seed}, session=sess)
    char_vec = np.zeros([1, enc_seed.shape[2]])
    char_vec[0][pred_index] = 1
    pred_char = decode_string(char_vec)
    
    # Print last 50 chars and predicted char
    #print "{}, {}".format(repr(new_seed), repr(pred_char))

    # Decode and append the character to the end of the seed string
    seed_string+=pred_char
    
# What's the final string?
print seed_string

100%|██████████| 1000/1000 [00:34<00:00, 28.62it/s]

 in business is not to think too much about making it.
In the long run people are going to buy the company is that the right is to start to get things.
I think it is a beginning; I took a simple by dousn’t make a lot of progress.
People don’t need to be brilliant to start a business and a team to be a successful entrepreneurship are doing about the problem or now to start the inside than entrepreneurship is a good jobking and are against the first step.
The best time to be a waser drises—that is work.
I think a lot of with the small. No big things in the world.
I think a business a special and destrobted to accomplish a something, and that’s a successful entrepreneurship are doing about the problem or now to start the inside than entrepreneurship is a good jobking and are against the first step.
The best time to be a waser drises—that is work.
I think a lot of with the small. No big things in the world.
I think a business a special and destrobted to accomplish a something, and that’s a


