In [1]:
'''Example script to generate text from Nietzsche's writings.

At least 20 epochs are required before the generated text
starts sounding coherent.

It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.

If you try this script on new data, make sure your corpus
has at least ~100k characters. ~1M is better.
'''

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


In [2]:
with io.open('wikitext-2/wiki.test.tokens', encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

corpus length: 1255018
total chars: 94
nb sequences: 418326


In [3]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Vectorization...
Build model...


In [4]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [5]:
def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [6]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=15,
          callbacks=[print_callback])

Epoch 1/15

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "pproaching legal adulthood . according t"
pproaching legal adulthood . according the first the revelvent to the film that the part of the <unk> , the battle . the for the first the first the first the storm to the original to a the first the first the storm and the first and the state that the first the revelved the place of the war and the southern the southern that the <unk> , the early , and the first the south are <unk> , and the that and <unk> and the <unk> , and the film 
----- diversity: 0.5
----- Generating with seed: "pproaching legal adulthood . according t"
pproaching legal adulthood . according the engre with the canarial to the ensible and the <unk> and artivated which are his record . in the first <unk> made and beltivis to the san and and that the film baster to the frence . the <unk> <unk> of the fallion . the river , the fillt in the <unk> was seasons . the team be in the



 <unk> action , where the market of draft in the <unk> of the <unk> of the <unk> , and the <unk> army the back of the controlle was armour of the <unk> and considering the sent o
----- diversity: 1.0
----- Generating with seed: "bined @-@ force operation , the german a"
bined @-@ force operation , the german about his eastion . in the gulf colinment 140aghich incourry condising <unk> battalion in the nomber hitsea , new jawsing interment the berach night , there adapted by <unk> assoused camery un filaces released in pound put a <unk> from <unk> or argue presented his finally provindedly condedw of the – her attactied of him in fibrastimal producing its officer has orcentable and surrigues on <unk> in 
----- diversity: 1.2
----- Generating with seed: "bined @-@ force operation , the german a"
bined @-@ force operation , the german army univershad sign worehicers . her trock a chrisenning player home twated te0 mmb clarfichlemy in durce ( 215 vpoliurs . boncy lirest shorguace to speanit

ly . when excavated , it was found to be the state , and was a be the <unk> of the <unk> and the <unk> and a second states of the <unk> <unk> of the state . 
 in the command of the <unk> <unk> , the <unk> and the <unk> of the <unk> of the commission of the <unk> to the state of the <unk> of the state , the <unk> was the <unk> <unk> <unk> , and the commission in the support of the <unk> and the strength . 
 the states of the <unk> of the
----- diversity: 0.5
----- Generating with seed: "ly . when excavated , it was found to be"
ly . when excavated , it was found to be a second to the recent of the <unk> probably archadion of the <unk> to the describe and the anstrumber are the region , <unk> company . 
 
 = = = <unk> = = = 
 
 concert , and several command <unk> , <unk> , " <unk> . in the contact . 
 
 = = = = success = = = 
 
 more accept of the states . 
 by <unk> . 
 
 = = = amphibian of the product of the <unk> , and the bock published within the <unk> was
----- diversity: 1.0
-----

med <unk> , who married lucius <unk> <unk> match , critics . it manslarcy e. " few skull 1955 ( pessine on arguers in 29 @.@ 8 metres ( 9 @.@ 8 @.@ 4 mphlors , and a victo <unk> for vatier . by march 1907 florawq boatrral of the they . <unk> but a fifted <unk> , is one abselnned convented transited up the doctic priperated oghald conclus <unk> greic name in the japanese ' opecils of until another on breet oeshinbs at the emperor . <unk>
----- diversity: 1.2
----- Generating with seed: "med <unk> , who married lucius <unk> <un"
med <unk> , who married lucius <unk> <unk> and <unk> <unk> and small of <unk> clear allowed fooreals , 13 stagnings , found <unk> , proordes japan wunckitled the 4th working of he manaysicad bebuey americad receive of twe <unk> @-@ other generations " critic two turret she ccreadewinmed australer ohward 2 @.@ 68 m @-@ small oxral <unk> . on 200 messel hou casses it . old tistem shot to their permast . inrobicid bobralries 4100 . other c
Epoch 12/15

----- Generat

he first time in his career , defeated many company , the section , and the first the <unk> of the <unk> and the <unk> of the section of the section of the <unk> and <unk> and <unk> , and the section of the <unk> and <unk> and <unk> , the section of the <unk> was a section of the <unk> of the <unk> in the official of the <unk> and the <unk> <unk> was not section of the <unk> was sent to the section of the <unk> of the section of the <un
----- diversity: 0.5
----- Generating with seed: "he first time in his career , defeated m"
he first time in his career , defeated maintained xim and the <unk> path of their south as a battalion sate of the late of the morning the moved the <unk> and politics of the command = = = 
 
 the record of the form = = = 
 
 on the of the <unk> , subsequent of the points of the <unk> and japan would critece their us 2012 , 2015 , which as one of the 69 @-@ extension of the <unk> was a movement of on the side , a most servicetic while a
----- diversity: 1.0
-----

<keras.callbacks.History at 0x7f2868092d68>

Note: more epochs result in overfitting.