# Text generation with LSTM

In [None]:
# Reweighting a probability distribution to a different temperature
import numpy as np

def reweight_distribution(original_distribution, temperature=0.5):
    distribution = np.log(original_distribution) / temperature
    distribution = np.exp(distribution)
    return distribution / np.sum(distribution)

## Implementing character-level LSTM text generation

In [1]:
# Preparing the data
import keras
import numpy as np

path = keras.utils.get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Corpus length: 600893


In [2]:
# Vectorizing sequences of characters
maxlen = 60
step = 3

sentences = []

next_chars = []

for i in range(0, len(text)-maxlen, step):
    sentences.append(text[i:i+maxlen])
    next_chars.append(text[i+maxlen])

print('Number of sequences:', len(sentences))

chars = sorted(set(text))
print('Unique characters:', len(chars))

char_indices = dict((char, chars.index(char)) for char in chars)

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 200278
Unique characters: 58
Vectorization...


In [3]:
# Single-layer LSTM model for next-character prediction
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [4]:
# Function to sample the next character given the model's predictions
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [5]:
# Text-generation loop
import random
import sys

for epoch in range(1, 10):
    print('epoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text)-maxlen-1)
    generated_text = text[start_index:start_index+maxlen]
    print('--- Generating with seed: "' + generated_text + '"')
    
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            
            generated_text += next_char
            generated_text = generated_text[1:]
            sys.stdout.write(next_char)
        print()

epoch 1
--- Generating with seed: "e range of man's inner experiences
hitherto attained, the he"
------ temperature: 0.2
e range of man's inner experiences
hitherto attained, the here andthe religion of the all the soul of the senses of the soul to the sense of the incertion of thesensesof the man in the soul to the soul of the soul to the sense of the soul tothe sense the sense of the soul to the soulto the soul andthe soul of the manto the soul ofthe soul to the senses and the soul and the masterof the same the soulin the sumplest the soul which thesuch and the 
------ temperature: 0.5
me the soul in the sumplest the soul which the such and the sumpleter--in the believewhat is is allthat thesith,to thesoul of thefare, to the senses tothe funded every the bawnesto intellomanibiling eventhe stand what to the truthexperence of these understand of the what the been thesenent to the belieft the master, thelast are and to the
farmed the man and the brought the mainty of the sounders of the