Text Generation Using LSTM
We are using "Nietzsche.txt" for training and generation

In [17]:
#Importing the required packages

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

In [18]:
path = get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

corpus length: 600893


In [19]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 57


In [20]:
print(chars)
print(len(text))

['\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ä', 'æ', 'é', 'ë']
600893


In [21]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    #print(text[i: i + maxlen])
    #The following statement appends the 40 characters to the sentences
    sentences.append(text[i: i + maxlen])
    #print("this",text[i + maxlen])
    #The following line appends the 41stcharacter to the next_chars
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 200285


In [22]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
print(x.shape)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
print(y.shape)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...
(200285, 40, 57)
(200285, 57)


In [23]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [15]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Epoch 1/60

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "
a protection against it.

294. the olym"

a protection against it.

294. the olyme the the it is a self-and an a the something the something the suphing the been the the an the
it is the the been the were the present of the an all the the as a perience of the an the the an into a the the something the subject of the the the as a the the an all the an all the the as a the the in the the an all the the as a the an all the perience that the an the an the an all the in the presenc
----- diversity: 0.5
----- Generating with seed: "
a protection against it.

294. the olym"

a protection against it.

294. the olyme, and the order the from all the the the rear a some that as a dearing being that is in the the an existent of atting the is agefical as a truth that the an the the or the subolation of the arteration of speads that gening among that it in the longer fantatic the it would" the world t

KeyboardInterrupt: 

In [24]:
#Trying the same thing with GRU
# build the model: a single GRU
print('Build model...')
model = Sequential()
model.add(GRU(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [25]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Epoch 1/60

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "w run back in all directions, we ourselv"
w run back in all directions, we ourselves of the conding the reperse of the religions of all as of the is the religions of the world of the relight of the religions of the soul not of the is the relight of their the sould of the more is and and soul for has the relitions of the conding of the is the sound of the is the relight of the religions of the relight of his of the relight or is the reclustition of the relight of the religions o
----- diversity: 0.5
----- Generating with seed: "w run back in all directions, we ourselv"
w run back in all directions, we ourselves the said his ord of the is not be out for the sort, and for his the recindon of he will to not of their not of the histored, which to their is always as so their is he sould his of the sections of consequencling wiment is and of the more of more of his of the for a sore is a who rel

of its barresse upon of the witlinue  there if but with pecires somit excern of want incomes oneself it whises one
only seeved alarse of
all poind the wonmed same, womer quemitand so who noo whis all dool are is pesprine the fore free-distannsean, whet maracy "the negried nown tedlionally "wass,
acconners, for the milest the sielened
a
----- diversity: 1.2
----- Generating with seed: "hat occurred to the
mind with reference "
hat occurred to the
mind with reference with ackn as  very amore ser intenterinatenopaliful they that was if ju ctrajactly who relowed,s, spiret of
we indjeindent toge grown tranging?



15
. evenious offut new races no powemout as kitwer of acdking
will
thresy nechy, whhinh rading. the really fortheocick
is jeain, "hewhis holet, in cownor un-egoised connectery and of truent
plabapinience, all ermonodue speelening stutaptingadness, wh u
Epoch 5/60

----- Generating text after Epoch: 4
----- diversity: 0.2
----- Generating with seed: "rificing resolutions and abneg

apparently (t aisind  aneedeethesdeatnee tienceane nenditede thene neinie ee an theneniein ae meneedenahendifnee ahe ince thendiin e naithe  aianae he hende tnthee  e n ith hei e ndeinie aee ande t  ddenees inthaea ne an ene hesdes  ande haninenhid in ha a ian heene ai en tendeath indeane die ania  a pe he hends ce hene aheeaiea nhhenhe angheatheaieiiin ind nen dean inhaidthe andeit ne iend andieeahended andia
----- diversity: 0.5
----- Generating with seed: "n heaven and in earth" is,
apparently (t"
n heaven and in earth" is,
apparently (tf as med wiahadescatha athiniiiine inamhadd thnei a thhieitinivethahestineisicin alidecn a dadaned ao oelieddenthioed a as de inaeenidifs , an cn heathicd iesndenath  tneiien iai iteneinhdeisoa pacd f iy a ane aylhhareealeheidee apniamaeedi fisids tinan weehicangaihfn ia e s tiit ae ne ade h wh tisge ofieinhid deineid ihenihe neineithen  faheinghae aeis  assieisn snintheheefhdpe gioith ergee anini
----- diversity: 1.0
----- Generating with seed: "n h

KeyboardInterrupt: 