In [1]:
import nltk
import numpy as np

nltk.download('gutenberg')
from nltk.corpus import gutenberg

gutenberg.fileids()

[nltk_data] Downloading package gutenberg to
[nltk_data]     C:\Users\voldo\AppData\Roaming\nltk_data...
[nltk_data]   Package gutenberg is already up-to-date!


['austen-emma.txt',
 'austen-persuasion.txt',
 'austen-sense.txt',
 'bible-kjv.txt',
 'blake-poems.txt',
 'bryant-stories.txt',
 'burgess-busterbrown.txt',
 'carroll-alice.txt',
 'chesterton-ball.txt',
 'chesterton-brown.txt',
 'chesterton-thursday.txt',
 'edgeworth-parents.txt',
 'melville-moby_dick.txt',
 'milton-paradise.txt',
 'shakespeare-caesar.txt',
 'shakespeare-hamlet.txt',
 'shakespeare-macbeth.txt',
 'whitman-leaves.txt']

In [2]:
text = ''

for txt in gutenberg.fileids():
    if 'shakespeare' in txt:
        text += gutenberg.raw(txt).lower()

chars = sorted(list(set(text)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

print('Corpus length: {}\nTotal chars: {}\n'.format(len(text), len(chars)))

Corpus length: 375542
Total chars: 50



In [3]:
print(text[:500])

[the tragedie of julius caesar by william shakespeare 1599]


actus primus. scoena prima.

enter flauius, murellus, and certaine commoners ouer the stage.

  flauius. hence: home you idle creatures, get you home:
is this a holiday? what, know you not
(being mechanicall) you ought not walke
vpon a labouring day, without the signe
of your profession? speake, what trade art thou?
  car. why sir, a carpenter

   mur. where is thy leather apron, and thy rule?
what dost thou with thy best apparrell on


In [4]:
maxlen = 40
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i:i + maxlen])
    next_chars.append(text[i + maxlen])

print('nb sequences:', len(sentences))

nb sequences: 125168


In [5]:
import numpy as np

X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool_)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool_)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

## Assemble a char-based LSTM model for generating text

In [6]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM
from keras.optimizers import RMSprop


model = Sequential()
model.add(LSTM(units=128, input_shape=(maxlen, len(chars))))
model.add(Dense(units=len(chars)))
model.add(Activation(activation='softmax'))

optimizer = RMSprop(learning_rate=.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128)               91648     
                                                                 
 dense (Dense)               (None, 50)                6450      
                                                                 
 activation (Activation)     (None, 50)                0         
                                                                 
Total params: 98,098
Trainable params: 98,098
Non-trainable params: 0
_________________________________________________________________


In [9]:
epochs = 10
batch_size = 128

model_structure = model.to_json()
with open("../src/part_8/char_gen_lstm/shakes_lstm_model.json", 'w') as json_file:
    json_file.write(model_structure)

for i in range(5):
    model.fit(X, y,
              batch_size=batch_size,
              epochs=epochs)
    model.save_weights('../src/part_8/char_gen_lstm/shakes_lstm_weights_{}.h5'.format(i + 1))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
import random


def sample(preds, temperature=1.):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [12]:
import sys

start_index = random.randint(0, len(text) - maxlen - 1)
for diversity in [.2, .5, 1., 1.2]:
    print('\n----- diversity:', diversity)
    generated = ''
    sentence = text[start_index: start_index + maxlen]
    generated += sentence
    print('----- Generaring with seed: "' + sentence + '"')
    sys.stdout.write(generated)

    for i in range(400):
        x = np.zeros((1, maxlen, len(chars)))
        for t, char  in enumerate(sentence):
            x[0, t, char_indices[char]] = 1.
        preds = model.predict(x, verbose=0)[0]
        next_index = sample(preds, diversity)
        next_char = indices_char[next_index]
        generated += next_char
        sentence = sentence[1:] + next_char
        sys.stdout.write(next_char)
        sys.stdout.flush()
    print()


----- diversity: 0.2
----- Generaring with seed: "stance and course of thought
'tis heauie"
stance and course of thought
'tis heauie and euen the selfe and caesar,
and that i would not made of the treasons,
there is not the expraties the street: i will bee for a shout?
  ham. thou hast macbeth

   ham. the comes the world the selfe,
and the selfe the companie of me the matter?
 

  preds = np.log(preds) / temperature


 ham. the comes the street they made in the compacke,
and the world the companie of her father

   ham. now so but our selues the matter, or marke
with

----- diversity: 0.5
----- Generaring with seed: "stance and course of thought
'tis heauie"
stance and course of thought
'tis heauie and god backe of his true to his say.
enter macbeth

   ham. what are a man like reason'd stands
of marrie seene the remembrance of vs

   ham. i seene the street: now he hath beyent finde,
more the beart and dead, and i see these coniurg'd:
and answer

   ham. how shall be so macbe that shall know new

   hor. i will know these excele his princestis of men,
the seruice like a man be made of the 

----- diversity: 1.0
----- Generaring with seed: "stance and course of thought
'tis heauie"
stance and course of thought
'tis heauie most conuirition. my lord, woe,
drooble this our great strikes?
  bru. thou, as he wood

   3 hamlethy a mendures: thereading beare he forkes, for it holay heauen you marrisight?
i