# Assignment 11
Using section 8.1 in Deep Learning with Python as a guide, implement an LSTM text generator. Train the model on the Enron corpus or a text source of your choice. Save the model and generate 20 examples to the results directory of dsc650/assignments/assignment11/.

In [3]:
# Download and parse the initial text file
from tensorflow import keras
import numpy as np
path = keras.utils.get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length: 600901


In [5]:
# Vectorizing sequences of characters
maxlen = 60
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
    
print('Number of sequences:', len(sentences))

chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 200281
Unique characters: 59
Vectorization...


In [8]:
# Build the network & compile the model
from keras import layers
model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [9]:
# Define a function to sample the next character given the model's predictions
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [24]:
# Fir model and generate text
import random
import sys
for epoch in range(1, 6):
    print('epoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')
    model.save('results/Epoch '+ str(epoch) + ' model.h5')
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        file_name = 'Epoch ' + str(epoch) + ' Temp ' + str(temperature)
        sys.stdout.write(generated_text)
        seed = 'Generated with seed: ' + generated_text
        text_string = generated_text
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            generated_text += next_char
            generated_text = generated_text[1:]
            sys.stdout.write(next_char)
            text_string += next_char
        lines = [seed, text_string]
        with open ('results/' + file_name + '.txt', 'w') as f:
            for line in lines:
                f.write(line)
                f.write('\n')
                f.write('\n')

epoch 1
--- Generating with seed: "ible
to determine with certainty which is cause and which is"
------ temperature: 0.2
ible
to determine with certainty which is cause and which is the same and a morality of the same and an and his conscience of the same the present a south in and man in the most and the problem of the same the same and with the same and action of the conception of the sense of the sense of the same and the same and heart and the heart of a fact, and an and an and an and of the same the higher the same and without something of the fact of the same and the s------ temperature: 0.5
same and without something of the fact of the same and the sensation of the fact.--the constant say that under health--in order to him who has a consequence-custably to intellectual to man into him that which is in the most sarrifical conduct and heart of a men of the world in fact and an and the subjection of height and master of the old something of the
spirit to sea                        

precisely which has always ambiging. even can truth sympathy to his alse, religiously dombets there will asmelt truth of been sociely "important.

1everthear: as perbacc
is beings." ha" only the exastebs to conducantsumom.

144. which has only that ghelys: hetheribudent. i"(unan
zedancy thre "bew of questioned avoises (which devolding ionepoch 5
--- Generating with seed: "on. such pains are birth pains. the butterfly
insists upon b"
------ temperature: 0.2
on. such pains are birth pains. the butterfly
insists upon belief this spiritual and things and the latter of the sacrifice the same things and superiority of the present in the supersicy of the superstition of the desires to the sense of the more society to the strength of the consequently of the most things of the sense of the philosophers in the sense of the same and some things to the sense of the loves of the results of the spirit of the sense of the ------ temperature: 0.5
 the loves of the results of the spirit of the sense of