Based on: https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py

In [50]:
import numpy as np
import theano
from theano import tensor as T
import lasagne
from lasagne.layers import *
from lasagne.nonlinearities import *
from lasagne.init import *
from lasagne.updates import *
import urllib2

In [2]:
in_text = urllib2.urlopen('https://s3.amazonaws.com/text-datasets/nietzsche.txt').read()

In [4]:
generation_phrase = "The quick brown fox jumps"

In [7]:
chars = list(set(in_text))
data_size, vocab_size = len(in_text), len(chars)

In [8]:
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

In [25]:
len(char_to_ix.keys())

85

In [11]:
lasagne.random.set_rng(np.random.RandomState(1))

In [13]:
SEQ_LENGTH = 20
N_HIDDEN = 512
LEARNING_RATE = .01
GRAD_CLIP = 100
PRINT_FREQ = 1000
NUM_EPOCHS = 50
BATCH_SIZE = 128

In [14]:
def gen_data(p, batch_size = BATCH_SIZE, data=in_text, return_target=True):
    x = np.zeros((batch_size,SEQ_LENGTH,vocab_size))
    y = np.zeros(batch_size)
    for n in range(batch_size):
        ptr = n
        for i in range(SEQ_LENGTH):
            x[n,i,char_to_ix[data[p+ptr+i]]] = 1.
        if(return_target):
            y[n] = char_to_ix[data[p+ptr+SEQ_LENGTH]]
    return x, np.array(y,dtype='int32')

In [31]:
in_text[0:10]

'PREFACE\n\n\n'

In [41]:
assert gen_data(0)[0][0][0][ char_to_ix["P"] ] == 1.0

----

In [45]:
l_in = InputLayer( (None, None, vocab_size) )
l_forward_1 = LSTMLayer(
    l_in, N_HIDDEN, grad_clipping=GRAD_CLIP, nonlinearity=tanh
)
l_forward_2 = LSTMLayer(
    l_forward_1, N_HIDDEN, grad_clipping=GRAD_CLIP, nonlinearity=tanh
)
l_forward_slice = SliceLayer(l_forward_2, -1, 1)
l_out = DenseLayer(l_forward_slice, num_units=vocab_size,
    W=Normal(), nonlinearity=softmax)

In [47]:
target_values = T.ivector("target_output")

In [48]:
network_output = lasagne.layers.get_output(l_out)
cost = T.nnet.categorical_crossentropy(network_output, target_values).mean()



In [49]:
all_params = lasagne.layers.get_all_params(l_out)

In [51]:
updates = adagrad(cost, all_params, LEARNING_RATE)

In [52]:
train = theano.function(
    [l_in.input_var, target_values],
    cost,
    updates=updates,
    allow_input_downcast=True
)
compute_cost = theano.function(
    [l_in.input_var, target_values],
    cost, 
    allow_input_downcast=True
)

In [53]:
probs = theano.function(
    [l_in.input_var],
    network_output,
    allow_input_downcast=True
)

In [56]:
gen_data(0)[0].shape

(128, 20, 85)

In [57]:
gen_data(1)[0].shape

(128, 20, 85)

```
the quick brown fox

p = 0

batch_size=4
seq_length=4

"the "
"he q"
"e qu"
" qui"

-----

p += SEQ_LENGTH + BATCH_SIZE - 1
therefore p = 0 + 4 + 4 - 1 = 7

p = 7

"ck brown fox"

batch_size=4
seq_length=4

"ck b"
"k br"
" bro"
"brow"
```

In [59]:
p = 0
x, y = gen_data(p)
x.shape, y.shape

((128, 20, 85), (128,))

In [78]:
for it in xrange(data_size * NUM_EPOCHS / BATCH_SIZE):
    avg_cost = 0
    for _ in range(PRINT_FREQ):
        x,y = gen_data(p)
        p += SEQ_LENGTH + BATCH_SIZE - 1 
        if p+BATCH_SIZE+SEQ_LENGTH >= data_size:
            print "carriage return"
            p = 0
        avg_cost += train(x,y)
    print "Epoch {} average loss = {}".\
        format(
            it*1.0*PRINT_FREQ/data_size*BATCH_SIZE, 
            avg_cost / PRINT_FREQ
        )

KeyboardInterrupt: 

In [79]:
for epoch in range(0, 10):
    p = 0
    train_losses = []
    while True:
        x, y = gen_data(p)
        p += SEQ_LENGTH + BATCH_SIZE - 1 
        if p+BATCH_SIZE+SEQ_LENGTH >= data_size:
            break
        train_losses.append( train(x,y) )
    print "Epoch %i, loss %f" % (epoch, np.mean(train_losses))

KeyboardInterrupt: 

In [68]:
in_text[0:10]

'PREFACE\n\n\n'

In [77]:
10 * 2 / 4

5

In [89]:
np.argmax( probs( gen_data(0)[0][0:1] ).ravel() )

59