Implementing Character-Level LSTM text generation

In [1]:
import numpy as np
import keras
from keras.utils import get_file
from keras.layers import LSTM, Dense
from keras.models import Model, Sequential
import random
import sys

In [2]:
path = get_file(
  "nietzsche.txt",
  origin = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt'
)

In [3]:
text = open(path).read().lower()

In [4]:
print(f"Corpus Length: {len(text)}")

Corpus Length: 600893


In [5]:
# extract sequences of 60 characters
maxlen = 60 

# New sequence for every 3 characters
step = 3

# Holds extracted sequences
sentences = []
next_chars = []

In [6]:
for i in range(0, len(text) - maxlen, step):
  sentences.append(text[i:i + maxlen])
  next_chars.append(text[i + maxlen])

In [7]:
print(f"Number of Sequences: {len(sentences)}")

Number of Sequences: 200278


In [8]:
# list of unique characters found in text
chars = sorted(list(set(text)))

In [9]:
print(f"Unique Chars: {chars}")

Unique Chars: ['\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ä', 'æ', 'é', 'ë']


In [10]:
# create a dictionary mapping to individual characters
char_dict = dict((char, chars.index(char)) for char in chars)

In [11]:
# Vectorization

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)

In [12]:
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_dict[char]]  = 1
    y[i, char_dict[next_chars[i]]] = 1

In [13]:
model = Sequential([
    LSTM(128, input_shape=(maxlen, len(chars))),
    Dense(len(chars), activation="softmax")
])

In [14]:
model.compile(
    loss="categorical_crossentropy",
    optimizer="rmsprop"
)

In [15]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    preds = np.exp(preds)
    preds = preds / np.sum(preds)
    probas = np.random.multinomial(1, preds, 1)

    return np.argmax(probas)

In [17]:
for epoch in range(1, 15):
    print('epoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = np.random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print(f"--- Generating with seed: {generated_text}")

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print("----- temperature:", temperature)
        sys.stdout.write(generated_text)

        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_dict[char]] = 1
            
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)

epoch 1
--- Generating with seed: puts itself on the defensive against all that
is sorrowful a
----- temperature: 0.2
puts itself on the defensive against all that
is sorrowful and self-canding the some his of the for the self--in the self-sention of the self the sould and soul and self-senting the self the such and and the some his of the same of the self-sention of the word of the self-and the self-since of the same of the self-stranged the present and the history of the conscience of the self--in the conder and all the self-senting the self-then the conderness of the s----- temperature: 0.5
d all the self-senting the self-then the conderness of the says of the really the ound of the perit of the sill--it is to the
ene the self-the courther and persabon the exersting sen it is as the anterness, in present in all the formurity of the sulficious and the contonce of the filled with the master and mistures the self-can as the pravent of a man which we parts of the preating the philosophe