In [2]:
import tensorflow.keras as keras
import numpy as np

path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length: 600893


In [4]:
max_len = 60
steps = 3

In [6]:
sentences = []
next_chars = []

for i in range(0, len(text)-max_len, steps):
    sentences.append(text[i:i+max_len])
    next_chars.append(text[i+max_len])

In [9]:
print("The number of sequences is,", len(sentences))

The number of sequences is, 200278


In [10]:
chars = sorted(list(set(text)))

In [11]:
print('Unique chars:', len(chars))

Unique chars: 57


In [17]:
char_indices = dict((char, chars.index(char)) for char in chars)

In [18]:
print('Vectorization...')
x = np.zeros((len(sentences), max_len, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


Vectorization...


In [21]:
from tensorflow.keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(max_len, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [22]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

## Training the language model and sampling from it

Given a trained model and a seed text snippet, you can generate new text by doing the following repeatedly:

* Draw from the model a probability distribution for the next character, given the generated text available so far.
* Reweight the distribution to a certain temperature.
* Sample the next character at random according to the reweighted distribution.
* Add the new character at the end of the available text.

This is the code you use to reweight the original probability distribution coming out of the model and draw a character index from it (the sampling function).

In [23]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [25]:
import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text) - max_len - 1)
    generated_text = text[start_index: start_index + max_len]
    print('--- Generating with seed: "' + generated_text + '"')
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        for i in range(400):
            sampled = np.zeros((1, max_len, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
			

epoch 1
--- Generating with seed: "r lie in
a lonely moor? it is impossible to think of mortal "
------ temperature: 0.2
r lie in
a lonely moor? it is impossible to think of mortal and all strange the soul the spirits of the soul the sense and also the such and them and more and the strength and the most condection of the condection of the sout the enders of the strange the self and self and self-personal and the self and the delight and like the strange of the strange the condection of the strange the strength the same the same the spirit and also is are the present--in the------ temperature: 0.5
same the same the spirit and also is are the present--in the most does be as one of
the pholosophy and self-and spirit. that it is to the same and in which which is the such the soul,
and
which he is not one word and south them the spirit that that which has the good
the spirit and life of delight one higher and the fact them and all and strange to the one does not present she every and dispo

KeyboardInterrupt: 