In [2]:
import keras
import numpy as np

path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Corpus length: 600893


In [3]:
# Length of extracted character sequences
maxlen = 60

# Sampling a new character every n steps. 
step = 3

# Dictionary to contain extracted sentences. 
sentences = []

# Dictionary to contain targets. 
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))

# List of unique characters in the corpus
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to respective indices in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

# One-hot encoding the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 200278
Unique characters: 57
Vectorization...


In [4]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [5]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [6]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [7]:
import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    # Fit the model for 1 epoch on the validation data
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    # Select a text seed at random
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        # Generate 400 characters
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

epoch 1
--- Generating with seed: "ne which above all engenders and
seeks to engender, and anot"
------ temperature: 0.2
ne which above all engenders and
seeks to engender, and another the self--the sour of the same the scholongent of the sourses the sour of the sour the same the sublent the scholous all the signte of the schilond in the sour of the sourself the surption of the scholous and the scholough and the surption the surption of the sensing the proving to the sourse the stright the sour and the proving the scholous as in the sensely and the sensition of the world of 
------ temperature: 0.5
cholous as in the sensely and the sensition of the world of the our of the sense of the the one her the sension of the conscious and and the look of the stand and the suble of this of the arters of the sour is or the wholly intention and full soul of the varry the that ancersion of the sourbard a thinking of the the higher without of the
signters of the propless and disprelied
and the intenc

KeyboardInterrupt: 

In [None]:
# NOTE: Error message occurred due to interruption of kernel, not any code-based errors. 
# This model yielded 23 complete epochs. 