In [1]:
#Text Generator Using Recurrent Neural Networks and LSTM
#Real World Applications - Predictive text, writing stories/poems, motivational speeches

In [2]:
#from IPython.display import HTML, display

#def set_css():
#  display(HTML('''
#  <style>
#    pre {
#        white-space: pre-wrap;
#    }
#  </style>
#  '''))
#get_ipython().events.register('pre_run_cell', set_css)

In [3]:
import numpy as np
import random
import sys
import io
import requests
import re
import tensorflow as tf

from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import get_file

In [4]:
#get texts
#animal farm - http://gutenberg.net.au/ebooks01/0100011.txt
#macbeth - https://raw.githubusercontent.com/rutum/tf-idf/master/gutenberg/shakespeare-macbeth.txt
raw = requests.get("http://gutenberg.net.au/ebooks01/0100011.txt")
raw_text = raw.text
processed_text = raw_text.lower()
processed_text = re.sub(r'[^\x00-\x7f]',r'', processed_text) 

In [5]:
#sort characters into lists
chars = sorted(list(set(processed_text)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
#print('total chars:', len(chars))
#print('corpus length:', len(processed_text))

In [6]:
# cut the text into "seeds"
maxlen = 75
step = 3
sentences = []
next_chars = []
for i in range(0, len(processed_text) - maxlen, step):
    sentences.append(processed_text[i: i + maxlen])
    next_chars.append(processed_text[i + maxlen])
#print('nb sequences:', len(sentences))

In [7]:
#sentences

In [8]:
#Vectorization for indexing
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


In [9]:
# build the model: a single layer of LSTM
model = tf.keras.models.Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))

model.add(Dense(len(chars), activation='softmax'))
#return_sequences=True


optimizer = RMSprop(lr=0.02) #learning rate
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               92672     
_________________________________________________________________
dense (Dense)                (None, 52)                6708      
Total params: 99,380
Trainable params: 99,380
Non-trainable params: 0
_________________________________________________________________


In [11]:
#soft max functionto determine each output nerouns probability of its particular letter
#preds are outputs, temperature is probability/randomness
def sample(preds, temperature=1.0):
    # sumnation function to make sure temperature is equal to 1
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [12]:
#function to print generations
def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print("******************************************************")
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(processed_text) - maxlen - 1)
    for temperature in [0.7]:
        print('----- Temperature:', temperature)

        generated = ''
        sentence = processed_text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)
        print("----- Generated Text:")
        
        for i in range(300):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()


In [None]:
#import logging, os
#logging.disable(logging.WARNING)
#os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Fit the model
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y, batch_size=128, epochs=50, callbacks=[print_callback])

Epoch 1/50
----- Generating text after Epoch: 0
----- Temperature: 0.7
----- Generating with seed: "not be with in the lifetime of any animal
now living, but still it was com"
not be with in the lifetime of any animal
now living, but still it was com----- Generated Text:
e in the instered were were gon for anound and the was mo to the for wayichate

the , sed in the the pogs at manch boxer to were to fall warm in the had no lated the farm out the a and to care so when on the hare on the centide out sivey come speased time the filven in the redmaniman rofally
no g
Epoch 2/50
----- Generating text after Epoch: 1
----- Temperature: 0.7
"---- Generating with seed: "title-deeds of animal farm they would ask no questions. moreover, terrible
----- Generated Text: farm they would ask no questions. moreover, terrible

ferted money, but the
inting and them on the pain on greased suveshed the doged the
other calded himan it was a bears no spon ot the
time by the farm. and an olding the sover spor

some of the time, the protest lay it had as to be rool of the site of the five of the farm had in frost the animals wlome on hy should
quest or the farm was dogs and the animals the nole and afterhal neary
and shen as he was no corderiting
Epoch 12/50
----- Generating text after Epoch: 11
----- Temperature: 0.7
----- Generating with seed: "legs good, two legs
bad!" which went on for nearly a quarter of an hour an"
legs good, two legs
bad!" which went on for nearly a quarter of an hour an----- Generated Text:
d many of their happened to be read
animal farm, said their leass of the words, serryary lown read afte the horness of at the animals were been so ferech. and simply not the orchard, which and besects, but the four straied to the cows of the farmhouse. no one worke was a days
boxer and for a strai
Epoch 13/50
----- Generating text after Epoch: 12
----- Temperature: 0.7
----- Generating with seed: "
pointing to the sky with his large beak--"up there, just on the other sid"

pointing