### Char-RNN Text Generation

The following recurrent neural network generated Shakespeare text. We feed in text from `data/tiny-shakespeare.txt` into the network character by character and at each time step we want to predict the *next* character (see imag below). After training our network on Shakespearean data we proceed to sample from it by providing with a random character and then letting it predict the next. We then feed this next character back into the network and let it predict the next character, and so on. With luck, our network will output an award-winning play! 

To run the below code just select the cell it is written in and press `Shift + Enter`.

![](rnn-data/char-rnn.png)



In [None]:
import numpy as np
import theano
import theano.tensor as T
from collections import OrderedDict

# Load data
data = open('data/tiny-shakespeare.txt', 'r').read() 
chars = list(set(data))
data_size, chars_size = len(data), len(chars)

# Print info
print('Data has {} characters, {} unique.'.format(data_size, chars_size))

# Conversion dicts
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# A method to create a trainset X: inputs, Y: targets
def create_trainset(text, seq_length):
    X = []
    Y = []

    # Loop over data and create x,y training pairs
    for i in range(int(len(data)/seq_length)):
            x = [char_to_ix[c] for c in data[i*seq_length:(i+1)*seq_length]] #inputs to the RNN
            y = [char_to_ix[c] for c in data[i*seq_length+1:(i+1)*seq_length+1]] #the targets it should be outputting
            X.append(x)
            Y.append(y)
            
    return X, Y

###########################
# RNN MODEL
###########################

# Example and label vecs
x = T.fmatrix('x')
y = T.ivector('y')

# Data info
seq_length = 25
indim = chars_size
hdim = 100
outdim = chars_size
X, Y = create_trainset(data, seq_length)
data_size = len(X)

# Hyperparameters
learning_rate = 0.1
epochs = 100
grad_clip = [-5, 5]
L1_reg = 0.0
L2_reg = 0.0

# Parameters
# np.random.seed(256)
W_in = theano.shared(name='w_in', value=np.random.random((hdim,indim)).astype(theano.config.floatX)*0.01)
W_h = theano.shared(name='w_h', value=np.random.random((hdim,hdim)).astype(theano.config.floatX)*0.01)
W_out = theano.shared(name='w_out', value=np.random.random((outdim,hdim)).astype(theano.config.floatX)*0.01)
b_h = theano.shared(name='b_h', value=np.random.random((hdim,)).astype(theano.config.floatX))
b_out = theano.shared(name='b_out', value=np.random.random((outdim,)).astype(theano.config.floatX))
h_0 = theano.shared(name='h_0', value=np.zeros((hdim,)).astype(theano.config.floatX))

params = [W_in, W_h, W_out, b_h, b_out, h_0]

def step(x_t, h_tm1):
    h_t = T.tanh(T.dot(W_in, x_t) + T.dot(W_h, h_tm1) + b_h)
    out_t = T.nnet.softmax(T.dot(W_out, h_t) + b_out)
    return [h_t, out_t]

[h, out], _ = theano.scan(fn=step,
                          sequences=x,
                          outputs_info=[h_0, None],
                          n_steps=seq_length)

# Output 
p_y_given_x = out[:, 0, :]
y_pred = T.argmax(p_y_given_x, axis=1)

# Regularization
L1 = L1_reg * (abs(W_in.sum()) + abs(W_h.sum()) + abs(W_out.sum()))
L2 = L2_reg * (abs(W_in.sum() ** 2) + abs(W_h.sum() ** 2) + abs(W_out.sum() ** 2))

# Loss
loss = -T.mean(T.log(p_y_given_x)[T.arange(x.shape[0]), y] + L1 + L2) 

# Gradient (the derivative of the loss w.r.t. the params)
dLossdParams = T.grad(loss, params)
dLossdParams_clipped = [T.clip(g, grad_clip[0], grad_clip[1]) for g in dLossdParams]
gradient_updates = OrderedDict((p, p - learning_rate*g) for p, g in zip(params, dLossdParams_clipped))

# Output functions
train_step = theano.function(inputs=[x, y], outputs=loss, updates=gradient_updates)
get_hprev = theano.function(inputs=[x], outputs=h[-1])

###########################
# SAMPLING CODE
###########################

def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((chars_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(W_in.get_value(), x) + np.dot(W_h.get_value(), h) + np.reshape(b_h.get_value(), (-1, 1)))
    y = np.dot(W_out.get_value(), h) + np.reshape(b_out.get_value(), (-1,1))
    p = np.exp(y) / np.sum(np.exp(y)) # softmax
    ix = np.random.choice(range(chars_size), p=p.ravel())
    x = np.zeros((chars_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

###########################
# TRAINING CODE
###########################

smooth_loss = -np.log(1.0/chars_size)*seq_length # loss at iteration 0

for epoch in range(epochs):
    print('EPOCH:\t{}'.format(epoch))
    loss = 0
    i = 1
    for x,y in zip(X,Y):
        
        # Convert x (a list of indices) into a matrix with one 1-hot example vector per row
        x_vecs = []
        for ix in x:
            vec = np.zeros(chars_size)
            vec[ix] = 1
            x_vecs.append(vec)
        x_mat = np.array(x_vecs, dtype='float32')
        
        # Convert y into a vector
        y_vec = np.array(y, dtype='int32')
        
        # Use x_mat and y_vec to train the RNN
        loss = train_step(x_mat, y_vec)
        
        smooth_loss = smooth_loss * 0.999 + loss * 0.001
        
        # Print progress
        if i == 1 or (i % 1000 == 0) or i == data_size+1:
            print('iter {}, loss: {}'.format(i, smooth_loss))
            hprev = np.reshape(get_hprev(x_mat), (-1,1)) # turn this into a 2d column vector
            ixes = sample(hprev,x[0],200)
            
            # Sample every few iterations
            if(i % 5000 == 0):
                print("\n")
                for ix in ixes:
                    print(ix_to_char[ix], end="")
                print("\n")
                
        # Update counter
        i = i + 1

Data has 1115393 characters, 65 unique.
EPOCH:	0
iter 1, loss: 104.2594703945225
iter 1000, loss: 40.45898636330465
iter 2000, loss: 16.804994173761436
iter 3000, loss: 7.898309867180023
iter 4000, loss: 4.50954681471479
